1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-01 06:46:55 +03:00

MCOL-523 Add UDAF and UDAnF SDK

This commit is contained in:
David Hall
2017-07-26 11:53:08 -05:00
parent 630b113565
commit bc2a4e7795
75 changed files with 10250 additions and 4523 deletions

4
utils/udfsdk/CMakeLists.txt Normal file → Executable file
View File

@ -4,13 +4,13 @@ include_directories( ${ENGINE_COMMON_INCLUDES}
########### next target ###############
set(udfsdk_LIB_SRCS udfsdk.cpp)
set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp)
add_definitions(-DMYSQL_DYNAMIC_PLUGIN)
add_library(udfsdk SHARED ${udfsdk_LIB_SRCS})
set_target_properties(udfsdk PROPERTIES VERSION 1.0.0 SOVERSION 1)
set_target_properties(udfsdk PROPERTIES VERSION 1.1.0 SOVERSION 1)
install(TARGETS udfsdk DESTINATION ${ENGINE_LIBDIR} COMPONENT libs)

96
utils/udfsdk/allnull.cpp Executable file
View File

@ -0,0 +1,96 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <sstream>
#include <cstring>
#include "allnull.h"
#include "bytestream.h"
#include "objectreader.h"
using namespace mcsv1sdk;
struct allnull_data
{
uint64_t totalQuantity;
uint64_t totalNulls;
};
#define OUT_TYPE int64_t
mcsv1_UDAF::ReturnCode allnull::init(mcsv1Context* context,
COL_TYPES& colTypes)
{
context->setUserDataSize(sizeof(allnull_data));
if (colTypes.size() < 1)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("allnull() with 0 arguments");
return mcsv1_UDAF::ERROR;
}
context->setResultType(CalpontSystemCatalog::TINYINT);
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode allnull::finish(mcsv1Context* context)
{
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode allnull::reset(mcsv1Context* context)
{
struct allnull_data* data = (struct allnull_data*)context->getUserData()->data;
data->totalQuantity = 0;
data->totalNulls = 0;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsIn)
{
struct allnull_data* data = (struct allnull_data*)context->getUserData()->data;
for (size_t i = 0; i < context->getParameterCount(); i++)
{
data->totalQuantity++;
if (context->isParamNull(0))
{
data->totalNulls++;
}
}
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode allnull::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
{
struct allnull_data* outData = (struct allnull_data*)context->getUserData()->data;
struct allnull_data* inData = (struct allnull_data*)userDataIn->data;
outData->totalQuantity += inData->totalQuantity;
outData->totalNulls += inData->totalNulls;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode allnull::evaluate(mcsv1Context* context, static_any::any& valOut)
{
OUT_TYPE allNull;
struct allnull_data* data = (struct allnull_data*)context->getUserData()->data;
allNull = data->totalQuantity > 0 && data->totalNulls == data->totalQuantity;
valOut = allNull;
return mcsv1_UDAF::SUCCESS;
}

225
utils/udfsdk/allnull.h Executable file
View File

@ -0,0 +1,225 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/***********************************************************************
* $Id$
*
* mcsv1_UDAF.h
***********************************************************************/
/**
* Columnstore interface for writing a User Defined Aggregate
* Functions (UDAF) and User Defined Analytic Functions (UDAnF)
* or a function that can act as either - UDA(n)F
*
* The basic steps are:
*
* 1. Create a the UDA(n)F function interface in some .h file.
* 2. Create the UDF function implementation in some .cpp file
* 3. Create the connector stub (MariaDB UDAF definition) for
* this UDF function.
* 4. build the dynamic library using all of the source.
* 5 Put the library in $COLUMNSTORE_INSTALL/lib of
* all modules
* 6. restart the Columnstore system.
* 7. notify mysqld about the new functions with commands like:
*
* // An example of xor over a range for UDAF and UDAnF
* CREATE AGGREGATE FUNCTION mcs_bit_xor returns BOOL soname
* 'libudfsdk.so';
*
* // An example that only makes sense as a UDAnF
* CREATE AGGREGATE FUNCTION mcs_interpolate returns REAL
* soname 'libudfsdk.so';
*
* The UDAF functions may run distributed in the Columnstore
* engine. UDAnF do not run distributed.
*
* UDAF is User Defined Aggregate Function.
* UDAnF is User Defined Analytic Function.
* UDA(n)F is an acronym for a function that could be either. It
* is also used to describe the interface that is used for
* either.
*/
#ifndef HEADER_allnull
#define HEADER_allnull
#include <cstdlib>
#include <string>
#include <vector>
#include <boost/any.hpp>
#ifdef _MSC_VER
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include "mcsv1_udaf.h"
#include "calpontsystemcatalog.h"
#include "windowfunctioncolumn.h"
using namespace execplan;
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
namespace mcsv1sdk
{
// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or
// User Defined Analytic Function (UDAnF).
// These will be singleton classes, so don't put any instance
// specific data in here. All instance data is stored in mcsv1Context
// passed to each user function and retrieved by the getUserData() method.
//
// Each API function returns a ReturnCode. If ERROR is returned at any time,
// the query is aborted, getInterrupted() will begin to return true and the
// message set in config->setErrorMessage() is returned to MariaDB.
class allnull : public mcsv1_UDAF
{
public:
// Defaults OK
allnull() : mcsv1_UDAF(){};
virtual ~allnull(){};
/**
* init()
*
* Mandatory. Implement this to initialize flags and instance
* data. Called once per SQL statement. You can do any sanity
* checks here.
*
* colTypes (in) - A vector of ColDataType defining the
* parameters of the UDA(n)F call. These can be used to decide
* to override the default return type. If desired, the new
* return type can be set by context->setReturnType() and
* decimal precision can be set in context->
* setResultDecimalCharacteristics.
*
* Return mcsv1_UDAF::ERROR on any error, such as non-compatible
* colTypes or wrong number of arguments. Else return
* mcsv1_UDAF::SUCCESS.
*/
virtual ReturnCode init(mcsv1Context* context,
COL_TYPES& colTypes);
/**
* finish()
*
* Mandatory. Completes the UDA(n)F. Called once per SQL
* statement. Do not free any memory allocated by
* context->setUserDataSize(). The SDK Framework owns that memory
* and will handle that. Often, there is nothing to do here.
*/
virtual ReturnCode finish(mcsv1Context* context);
/**
* reset()
*
* Mandatory. Reset the UDA(n)F for a new group, partition or,
* in some cases, new Window Frame. Do not free any memory
* allocated by context->setUserDataSize(). The SDK Framework owns
* that memory and will handle that. Use this opportunity to
* reset any variables in context->getUserData() needed for the
* next aggregation. May be called multiple times if running in
* a ditributed fashion.
*
* Use this opportunity to initialize the userData.
*/
virtual ReturnCode reset(mcsv1Context* context);
/**
* nextValue()
*
* Mandatory. Handle a single row.
*
* colsIn - A vector of data structure describing the input
* data.
*
* This function is called once for every row in the filtered
* result set (before aggregation). It is very important that
* this function is efficient.
*
* If the UDAF is running in a distributed fashion, nextValue
* cannot depend on order, as it will only be called for each
* row found on the specific PM.
*
* valsIn (in) - a vector of the parameters from the row.
*/
virtual ReturnCode nextValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsIn);
/**
* subEvaluate()
*
* Mandatory -- Called if the UDAF is running in a distributed
* fashion. Columnstore tries to run all aggregate functions
* distributed, depending on context.
*
* Perform an aggregation on rows partially aggregated by
* nextValue. Columnstore calls nextValue for each row on a
* given PM for a group (GROUP BY). subEvaluate is called on the
* UM to consolodate those values into a single instance of
* userData. Keep your aggregated totals in context's userData.
* The first time this is called for a group, reset() would have
* been called with this version of userData.
*
* Called for every partial data set in each group in GROUP BY.
*
* When subEvaluate has been called for all subAggregated data
* sets, Evaluate will be called with the same context as here.
*
* valIn (In) - This is a pointer to a memory block of the size
* set in setUserDataSize. It will contain the value of userData
* as seen in the last call to NextValue for a given PM.
*
*/
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn);
/**
* evaluate()
*
* Mandatory. Get the aggregated value.
*
* Called for every new group if UDAF GROUP BY, UDAnF partition
* or, in some cases, new Window Frame.
*
* Set the aggregated value into valOut. The datatype is assumed
* to be the same as that set in the init() function;
*
* If the UDAF is running in a distributed fashion, evaluate is
* called after a series of subEvaluate calls.
*
* valOut (out) - Set the aggregated value here. The datatype is
* assumed to be the same as that set in the init() function;
*
* To return a NULL value, don't assign to valOut.
*/
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
protected:
};
}; // namespace
#undef EXPORT
#endif // HEADER_allnull.h

258
utils/udfsdk/mcsv1_udaf.cpp Executable file
View File

@ -0,0 +1,258 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <sstream>
#include <cstring>
#include <stdexcept>
#include "mcsv1_udaf.h"
#include "bytestream.h"
#include "objectreader.h"
using namespace mcsv1sdk;
/**
* All UDA(n)F functions must be registered in the function map.
* They will be picked up by the Columnstore modules during
* startup.
*
* This is a temporary kludge until we get the library loader
* task complete
*/
UDAF_MAP UDAFMap::fm;
#include "allnull.h"
#include "ssq.h"
#include "median.h"
UDAF_MAP& UDAFMap::getMap()
{
if (fm.size() > 0)
{
return fm;
}
// first: function name
// second: Function pointer
// please use lower case for the function name. Because the names might be
// case-insensitive in MySQL depending on the setting. In such case,
// the function names passed to the interface is always in lower case.
fm["allnull"] = new allnull();
fm["ssq"] = new ssq();
fm["median"] = new median();
return fm;
}
int32_t mcsv1Context::getColWidth()
{
if (fColWidth > 0)
{
return fColWidth;
}
// JIT initialization for types that have a defined size.
switch (fResultType)
{
case CalpontSystemCatalog::BIT:
case CalpontSystemCatalog::TINYINT:
case CalpontSystemCatalog::UTINYINT:
case CalpontSystemCatalog::CHAR:
fColWidth = 1;
break;
case CalpontSystemCatalog::SMALLINT:
case CalpontSystemCatalog::USMALLINT:
fColWidth = 2;
break;
case CalpontSystemCatalog::MEDINT:
case CalpontSystemCatalog::INT:
case CalpontSystemCatalog::UMEDINT:
case CalpontSystemCatalog::UINT:
case CalpontSystemCatalog::FLOAT:
case CalpontSystemCatalog::UFLOAT:
case CalpontSystemCatalog::DATE:
fColWidth = 4;
break;
case CalpontSystemCatalog::BIGINT:
case CalpontSystemCatalog::UBIGINT:
case CalpontSystemCatalog::DECIMAL:
case CalpontSystemCatalog::UDECIMAL:
case CalpontSystemCatalog::DOUBLE:
case CalpontSystemCatalog::UDOUBLE:
case CalpontSystemCatalog::DATETIME:
case CalpontSystemCatalog::STRINT:
fColWidth = 8;
break;
case CalpontSystemCatalog::LONGDOUBLE:
fColWidth = sizeof(long double);
break;
default:
break;
}
return fColWidth;
}
bool mcsv1Context::operator==(const mcsv1Context& c) const
{
// We don't test the per row data fields. They don't determine
// if it's the same Context.
if (getName() != c.getName()
|| fRunFlags != c.fRunFlags
|| fContextFlags != c.fContextFlags
|| fUserDataSize != c.fUserDataSize
|| fResultType != c.fResultType
|| fResultscale != c.fResultscale
|| fResultPrecision != c.fResultPrecision
|| fRowsInPartition != c.fRowsInPartition
|| fStartFrame != c.fStartFrame
|| fEndFrame != c.fEndFrame
|| fStartConstant != c.fStartConstant
|| fEndConstant != c.fEndConstant)
return false;
return true;
}
bool mcsv1Context::operator!=(const mcsv1Context& c) const
{
return (!(*this == c));
}
const std::string mcsv1Context::toString() const
{
std::ostringstream output;
output << "mcsv1Context: " << getName() << std::endl;
output << " RunFlags=" << fRunFlags << " ContextFlags=" << fContextFlags << std::endl;
output << " UserDataSize=" << fUserDataSize << " ResultType=" << colDataTypeToString(fResultType) << std::endl;
output << " Resultscale=" << fResultscale << " ResultPrecision=" << fResultPrecision << std::endl;
output << " ErrorMsg=" << errorMsg << std::endl;
output << " bInterrupted=" << bInterrupted << " RowsInPartition=" << fRowsInPartition << std::endl;
output << " StartFrame=" << fStartFrame << " EndFrame=" << fEndFrame << std::endl;
output << " StartConstant=" << fStartConstant << " EndConstant=" << fEndConstant << std::endl;
return output.str();
}
mcsv1sdk::mcsv1_UDAF* mcsv1Context::getFunction()
{
if (func)
{
return func;
}
// Just in time initialization
if (functionName.length() == 0)
{
std::ostringstream errmsg;
errmsg << "mcsv1Context::getFunction: " << functionName << " is empty";
throw std::logic_error(errmsg.str());
}
mcsv1sdk::UDAF_MAP::iterator funcIter = mcsv1sdk::UDAFMap::getMap().find(functionName);
if (funcIter == mcsv1sdk::UDAFMap::getMap().end())
{
std::ostringstream errmsg;
errmsg << "mcsv1Context::getFunction: " << functionName << " is undefined";
throw std::logic_error(errmsg.str());
}
func = funcIter->second;
return func;
}
mcsv1sdk::mcsv1_UDAF* mcsv1Context::getFunction() const
{
return const_cast<mcsv1Context*>(this)->getFunction();
}
void mcsv1Context::createUserData()
{
// Try the function. If not implemented, create a byte array.
UserData* userData = NULL;
mcsv1_UDAF::ReturnCode rc = getFunction()->createUserData(userData, fUserDataSize);
if (rc == mcsv1_UDAF::ERROR)
{
std::ostringstream errmsg;
errmsg << "mcsv1Context::createUserData: " << functionName << errorMsg.c_str();
throw std::logic_error(errmsg.str());
}
setUserData(userData);
}
void mcsv1Context::serialize(messageqcpp::ByteStream& b) const
{
b.needAtLeast(sizeof(mcsv1Context));
b << (ObjectReader::id_t) ObjectReader::MCSV1_CONTEXT;
b << functionName;
b << fRunFlags;
// Dont send context flags, These are set for each call
b << fUserDataSize;
b << (uint32_t)fResultType;
b << fResultscale;
b << fResultPrecision;
b << errorMsg;
// Don't send dataflags. These are set for each call
// bInterrupted is set internally.
b << fRowsInPartition;
b << (uint32_t)fStartFrame;
b << (uint32_t)fEndFrame;
b << fStartConstant;
b << fEndConstant;
}
void mcsv1Context::unserialize(messageqcpp::ByteStream& b)
{
ObjectReader::checkType(b, ObjectReader::MCSV1_CONTEXT);
b >> functionName;
b >> fRunFlags;
b >> fUserDataSize;
uint32_t iResultType;
b >> iResultType;
fResultType = (CalpontSystemCatalog::ColDataType)iResultType;
b >> fResultscale;
b >> fResultPrecision;
b >> errorMsg;
b >> fRowsInPartition;
uint32_t frame;
b >> frame;
fStartFrame = (WF_FRAME)frame;
b >> frame;
fEndFrame = (WF_FRAME)frame;
b >> fStartConstant;
b >> fEndConstant;
}
void UserData::serialize(messageqcpp::ByteStream& bs) const
{
bs << size;
bs.append(data, size);
}
void UserData::unserialize(messageqcpp::ByteStream& bs)
{
bs >> size;
memcpy(data, bs.buf(), size);
bs.advance(size);
}
const std::string typeStr("");
const static_any::any& mcsv1_UDAF::charTypeId = (char)1;
const static_any::any& mcsv1_UDAF::scharTypeId = (signed char)1;
const static_any::any& mcsv1_UDAF::shortTypeId = (short)1;
const static_any::any& mcsv1_UDAF::intTypeId = (int)1;
const static_any::any& mcsv1_UDAF::longTypeId = (long)1;
const static_any::any& mcsv1_UDAF::llTypeId = (long long)1;
const static_any::any& mcsv1_UDAF::ucharTypeId = (unsigned char)1;
const static_any::any& mcsv1_UDAF::ushortTypeId = (unsigned short)1;
const static_any::any& mcsv1_UDAF::uintTypeId = (unsigned int)1;
const static_any::any& mcsv1_UDAF::ulongTypeId = (unsigned long)1;
const static_any::any& mcsv1_UDAF::ullTypeId = (unsigned long long)1;
const static_any::any& mcsv1_UDAF::floatTypeId = (float)1;
const static_any::any& mcsv1_UDAF::doubleTypeId = (double)1;
const static_any::any& mcsv1_UDAF::strTypeId = typeStr;

990
utils/udfsdk/mcsv1_udaf.h Executable file
View File

@ -0,0 +1,990 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/***********************************************************************
* $Id$
*
* mcsv1_UDAF.h
***********************************************************************/
/**
* Columnstore interface for writing a User Defined Aggregate
* Functions (UDAF) and User Defined Analytic Functions (UDAnF)
* or a function that can act as either - UDA(n)F
*
* The basic steps are:
*
* 1. Create a the UDA(n)F function interface in some .h file.
* 2. Create the UDF function implementation in some .cpp file
* 3. Create the connector stub (MariaDB UDAF definition) for
* this UDF function.
* 4. build the dynamic library using all of the source.
* 5 Put the library in $COLUMNSTORE_INSTALL/lib of
* all modules
* 6. restart the Columnstore system.
* 7. notify mysqld about the new functions with commands like:
*
* // An example of xor over a range for UDAF and UDAnF
* CREATE AGGREGATE FUNCTION mcs_bit_xor returns BOOL soname
* 'libudfsdk.so';
*
* // An example that only makes sense as a UDAnF
* CREATE AGGREGATE FUNCTION mcs_interpolate returns REAL
* soname 'libudfsdk.so';
*
* The UDAF functions may run distributed in the Columnstore
* engine. UDAnF do not run distributed.
*
* UDAF is User Defined Aggregate Function.
* UDAnF is User Defined Analytic Function.
* UDA(n)F is an acronym for a function that could be either. It
* is also used to describe the interface that is used for
* either.
*/
#ifndef HEADER_mcsv1_udaf
#define HEADER_mcsv1_udaf
#include <cstdlib>
#include <string>
#include <vector>
#include <map>
#include <boost/shared_ptr.hpp>
#include <boost/any.hpp>
#ifdef _MSC_VER
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include "any.hpp"
#include "calpontsystemcatalog.h"
#include "wf_frame.h"
using namespace execplan;
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
namespace mcsv1sdk
{
/**
* A map from name to function object.
*
* This is temporary until we get the library loading task
* complete
*
* TODO: Remove when library loading is enabled.
*/
class mcsv1_UDAF;
typedef std::tr1::unordered_map<std::string, mcsv1_UDAF*> UDAF_MAP;
class UDAFMap
{
public:
EXPORT UDAFMap(){};
EXPORT ~UDAFMap(){};
static EXPORT UDAF_MAP& getMap();
private:
static UDAF_MAP fm;
};
/**
* A class to hold your user data
*
* If your UDAF only needs a fixed sized data struct, you need
* do nothing with this. Call setUserDataSize in your init
* function with the required size and the framework will take
* care of it.
*
* If you need something more or just want to control things,
* then override UserData with your data structure and
* implement createUserData in your function object to create
* your data structure. Your UserData destuctor should take care
* of any cleanup you may need (Simple containers clean
* themselves up).
*/
class mcsv1Context;
struct UserData
{
UserData() : size(0), data(NULL) {};
UserData(size_t sz) {size = sz; data = new uint8_t[sz];}
virtual ~UserData() { if (data) delete [] data;}
/**
* serialize()
*
* User data is passed between process. In order to do so, it
* must be serialized. Since user data can have sub objects,
* containers and the like, it is up to the UDAF to provide the
* serialize function. The streaming functionality of
* messageqcpp::ByteStream must be used.
*
* The default streams the size and data buffer to the
* ByteStream
*/
virtual void serialize(messageqcpp::ByteStream& bs) const;
/**
* unserialize()
*
* User data is passed between process. In order to do so, it
* must be unserialized. Since user data can have sub objects,
* containers and the like, it is up to the UDAF to provide the
* unserialize function. The streaming functionality of
* messageqcpp::ByteStream must be used.
*
* data is the datablock returned by createUserData.
*
* The default creates the data array and streams into data.
*/
virtual void unserialize(messageqcpp::ByteStream& bs);
// The default data store. You may or may not wish to use these fields.
uint32_t size;
uint8_t* data;
private:
// For now, copy construction is unwanted
UserData(UserData&);
};
// Flags to define the type and limitations of a UDA(n)F
// Used in context->fRunFlags
static uint64_t UDAF_OVER_REQUIRED __attribute__ ((unused)) = 1; // May only be used as UDAnF
static uint64_t UDAF_OVER_ALLOWED __attribute__ ((unused)) = 1 << 1; // May be used as UDAF or UDAnF
static uint64_t UDAF_ORDER_REQUIRED __attribute__ ((unused)) = 1 << 2; // If used as UDAnF, ORDER BY is required
static uint64_t UDAF_ORDER_ALLOWED __attribute__ ((unused)) = 1 << 3; // If used as UDAnF, ORDER BY is optional
static uint64_t UDAF_WINDOWFRAME_REQUIRED __attribute__ ((unused)) = 1 << 4; // If used as UDAnF, a WINDOW FRAME is required
static uint64_t UDAF_WINDOWFRAME_ALLOWED __attribute__ ((unused)) = 1 << 5; // If used as UDAnF, a WINDOW FRAME is optional
static uint64_t UDAF_MAYBE_NULL __attribute__ ((unused)) = 1 << 6; // If UDA(n)F might return NULL.
static uint64_t UDAF_IGNORE_NULLS __attribute__ ((unused)) = 1 << 7; // If UDA(n)F wants NULL rows suppressed.
// Flags set by the framework to define the context of the call.
// User code shouldn't use these directly
// used in context->fContextFlags
static uint64_t CONTEXT_IS_ANALYTIC __attribute__ ((unused)) = 1; // If called using OVER
static uint64_t CONTEXT_HAS_CURRENT_ROW __attribute__ ((unused)) = 1 << 1; // The current window contains the current row.
static uint64_t CONTEXT_IS_PM __attribute__ ((unused)) = 1 << 2; // The call was made by the PM
// Flags that describe the contents of a specific input parameter
// These will be set in context->dataFlags for each method call by the framework.
// User code shouldn't use these directly
static uint64_t PARAM_IS_NULL __attribute__ ((unused)) = 1;
static uint64_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1;
// shorthand for the list of columns in the call sent to init()
// first is the actual column name and second is the data type in Columnstore.
typedef std::vector<std::pair<std::string, CalpontSystemCatalog::ColDataType> >COL_TYPES;
// This is the context class that is passed to all API callbacks
// The framework potentially sets data here for each invocation of
// mcsv1_UDAF methods. Access methods are given for data useful to UDA(n)F.
// Don't modify anything directly except data retrieved with getUserData().
// UDA(n)F devlopers should not modify this class. The framework and other UDA(n)F
// rely on it being as it was when they were compiled.
//
// It's probable that future versions of Columnstore will add functionality to
// the context. UDA(n)F may need to be re-compiled in this case.
class mcsv1Context
{
public:
EXPORT mcsv1Context();
EXPORT mcsv1Context(const mcsv1Context& rhs);
// The destructor is virtual only in case a version 2 is made derived from v1
// to promote backward compatibility.
// mcsv1Context should never be subclassed by UDA(n)F developers
EXPORT virtual ~mcsv1Context();
// Set an error message if something goes wrong
EXPORT void setErrorMessage(std::string errmsg);
// Get the previously set error message
EXPORT const std::string& getErrorMessage() const;
// Set the flags as a set. Return the previous flags.
EXPORT uint64_t setRunFlags(uint64_t flags);
// return the flags
EXPORT uint64_t getRunFlags() const;
// The following set, get, clear and toggle methods can be used to manipulate
// multiple flags by ORing them together in the call sequence.
// Ex setRunFlag(UDAF_OVER_REQUIRED | UDAF_ORDER_REQUIRED);
// sets both flags and returns true if BOTH flags are already set.
//
// Set a specific flag and return its previous setting
EXPORT bool setRunFlag(uint64_t flag);
// Get a specific flag
EXPORT bool getRunFlag(uint64_t flag);
// clear a specific flag and return its previous setting
EXPORT bool clearRunFlag(uint64_t flag);
// toggle a specific flag and return its previous setting
EXPORT bool toggleRunFlag(uint64_t flag);
// Use these to determine the way your UDA(n)F was called
// Valid in all method calls
EXPORT bool isAnalytic();
EXPORT bool isWindowHasCurrentRow();
// Determine if the call is made by the UM
// This could be because the UDA(n)F is not being distributed
// Or it could be during setup or during consolodation of PM values.
// valid in all calls
EXPORT bool isUM();
// Determine if the call is made by the PM
// This will be during partial aggregation performed on the PM
// valid in all calls
EXPORT bool isPM();
// Parameter refinement description accessors
// valid in nextValue, dropValue and evaluateCumulative
size_t getParameterCount() const;
// Determine if an input parameter is NULL
// valid in nextValue, dropValue and evaluateCumulative
EXPORT bool isParamNull(int paramIdx);
// If a parameter is a constant, the UDA(n)F could presumably optimize its workings.
// During the first call to nextValue() or evaluateCumulative().
// Is there a better way to determine this?
// valid in nextValue, dropValue and evaluateCumulative
EXPORT bool isParamConstant(int paramIdx);
// For getting the result type.
EXPORT CalpontSystemCatalog::ColDataType getResultType() const;
// For getting the decimal characteristics for the return value.
// These will be set to the default before init().
EXPORT int32_t getScale() const;
EXPORT int32_t getPrecision() const;
// If you want to change the result type
// valid in init()
EXPORT bool setResultType(CalpontSystemCatalog::ColDataType resultType);
// For setting the decimal characteristics for the return value.
// This only makes sense if the return type is decimal, but should be set
// to (0, -1) for other types if the inout is decimal.
// valid in init()
EXPORT bool setScale(int32_t scale);
EXPORT bool setPrecision(int32_t precision);
// For all types, get the return column width in bytes. Ex. INT will return 4.
EXPORT int32_t getColWidth();
// For non-numric return types, set the return column width. This defaults
// to the the length of the input.
// valid in init()
EXPORT bool setColWidth(int32_t colWidth);
// If a method is known to take a while, call this periodically to see if something
// interupted the processing. If getInterrupted() returns true, then the executing
// method should clean up and exit.
EXPORT bool getInterrupted() const;
// Returns the actual number of rows in the partition. If no partitioning, returns 0.
// valid in reset()
EXPORT uint64_t getRowsInPartition() const;
// Returns the number of rows in the aggregate. This could be the total number of rows,
// the number of rows in the group, or the number of rows in the PM's subaggregate,
// depending on the context it was called.
// valid in subEvaluate() end evaluate().
EXPORT uint64_t getRowCnt() const;
// Allocate instance specific memory. This should be type cast to a structure overlay
// defined by the function. The actual allocatoin occurs in the various modules that
// do the aggregation. If the UDAF is being calculated in a distributed fashion, then
// multiple instances of this data may be allocated. Calls to the subaggregate functions
// do not share a context.
// You do not need to worry about freeing this memory. The framework handles all management.
// Call this during init()
EXPORT void setUserDataSize(int bytes);
// Call this everywhere except init()
EXPORT UserData* getUserData();
// Many UDAnF need a default Window Frame. If none is set here, the default is
// UNBOUNDED PRECEDING to CURRENT ROW.
// It's possible to not allow the the WINDOW FRAME phrase in the UDAnF by setting
// the UDAF_WINDOWFRAME_REQUIRED and UDAF_WINDOWFRAME_ALLOWED both to false. Columnstore
// requires a Window Frame in order to process UDAnF. In this case, the default will
// be used for all calls.
// Possible values for start frame are
// WF_UNBOUNDED_PRECEDING, WF_CURRENT_ROW, WF_PRECEDING or WF_FOLLOWING
// possible values for end frame are
// WF_CURRENT_ROW, WF_UNBOUNDED_FOLLOWING, WF_PRECEDING or WF_FOLLOWING
// If WF_PRECEEdING and/or WF_FOLLOWING, a start or end constant should
// be included to say how many preceeding or following is the default
// Set this during init()
EXPORT bool setDefaultWindowFrame(WF_FRAME defaultStartFrame,
WF_FRAME defaultEndFrame,
int32_t startConstant = 0, // For WF_PRECEEDING or WF_FOLLOWING
int32_t endConstant = 0); // For WF_PRECEEDING or WF_FOLLOWING
// There may be times you want to know the actual frame set by the caller
EXPORT void getStartFrame(WF_FRAME& startFrame, int32_t& startConstant) const;
EXPORT void getEndFrame(WF_FRAME& endFrame, int32_t& endConstant) const;
// Deep Equivalence
bool operator==(const mcsv1Context& c) const;
bool operator!=(const mcsv1Context& c) const;
// stream operator for debugging
EXPORT const std::string toString() const;
// Get the name of the function
EXPORT const std::string& getName() const;
EXPORT mcsv1Context& operator=(const mcsv1Context& rhs);
EXPORT mcsv1Context& copy(const mcsv1Context& rhs);
private:
uint64_t fRunFlags; // Set by the user to define the type of UDA(n)F
uint64_t fContextFlags; // Set by the framework to define this specific call.
int32_t fUserDataSize;
boost::shared_ptr<UserData> fUserData;
CalpontSystemCatalog::ColDataType fResultType;
int32_t fColWidth; // The length in bytes of the return type
int32_t fResultscale; // For scale, the number of digits to the right of the decimal
int32_t fResultPrecision; // The max number of digits allowed in the decimal value
std::string errorMsg;
std::vector<uint32_t>* dataFlags; // one entry for each parameter
bool* bInterrupted; // Gets set to true by the Framework if something happens
uint64_t fRowsInPartition; // Only valid in reset()
int64_t fRowCnt; // The number of rows involved in this aggregate.
WF_FRAME fStartFrame; // Is set to default to start, then modified by the actual frame in the call
WF_FRAME fEndFrame; // Is set to default to start, then modified by the actual frame in the call
int32_t fStartConstant; // for start frame WF_PRECEEDIMG or WF_FOLLOWING
int32_t fEndConstant; // for end frame WF_PRECEEDIMG or WF_FOLLOWING
std::string functionName;
mcsv1sdk::mcsv1_UDAF* func;
public:
// For use by the framework
EXPORT void serialize(messageqcpp::ByteStream& b) const;
EXPORT void unserialize(messageqcpp::ByteStream& b);
EXPORT void createUserData();
EXPORT void setUserData(boost::shared_ptr<UserData> userData);
EXPORT void setUserData(UserData* userData);
EXPORT void setName(std::string name);
EXPORT void setContextFlags(uint64_t flags);
EXPORT void setContextFlag(uint64_t flag);
EXPORT void clearContextFlag(uint64_t flag);
EXPORT uint64_t getContextFlags() const;
EXPORT uint32_t getUserDataSize() const;
EXPORT std::vector<uint32_t>& getDataFlags();
EXPORT void setDataFlags(std::vector<uint32_t>* flags);
EXPORT void setInterrupted(bool interrupted);
EXPORT void setInterrupted(bool* interrupted);
EXPORT void setRowCnt(uint64_t cnt);
EXPORT mcsv1sdk::mcsv1_UDAF* getFunction();
EXPORT mcsv1sdk::mcsv1_UDAF* getFunction() const;
EXPORT boost::shared_ptr<UserData> getUserDataSP();
};
// Since aggregate functions can operate on any data type, we use the following structure
// to define the input row data. To be type insensiteve, data is stored in type static_any::any.
//
// To access the data it must be type cast to the correct type using boost::any_cast.
// example for int data:
//
// if (dataType == CalpontSystemCatalog::INT)
// int myint = boost::any_cast<int>columnData;
//
// For multi-paramter aggregations, the colsIn vector of next_value()
// contains the ordered set of row parameters.
//
// For char, varchar, text, varbinary and blob types, columnData will be std::string.
struct ColumnDatum
{
CalpontSystemCatalog::ColDataType dataType; // defined in calpontsystemcatalog.h
static_any::any columnData;
uint32_t scale; // If dataType is a DECIMAL type
uint32_t precision; // If dataType is a DECIMAL type
ColumnDatum() : dataType(CalpontSystemCatalog::UNDEFINED), scale(0), precision(-1){};
};
// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or
// User Defined Analytic Function (UDAnF).
// These will be singleton classes, so don't put any instance
// specific data in here. All instance data is stored in mcsv1Context
// passed to each user function and retrieved by the getUserData() method.
//
// Each API function returns a ReturnCode. If ERROR is returned at any time,
// the query is aborted, getInterrupted() will begin to return true and the
// message set in config->setErrorMessage() is returned to MariaDB.
class mcsv1_UDAF
{
public:
enum ReturnCode
{
ERROR = 0,
SUCCESS = 1,
NOT_IMPLEMENTED = 2 // User UDA(n)F shouldn't return this
};
// Defaults OK
mcsv1_UDAF(){};
virtual ~mcsv1_UDAF(){};
/**
* init()
*
* Mandatory. Implement this to initialize flags and instance
* data. Called once per SQL statement. You can do any sanity
* checks here.
*
* colTypes (in) - A vector of ColDataType defining the
* parameters of the UDA(n)F call. These can be used to decide
* to override the default return type. If desired, the new
* return type can be set by context->setReturnType() and
* decimal scale and precision can be set by context->setScale
* and context->setPrecision respectively.
*
* Return mcsv1_UDAF::ERROR on any error, such as non-compatible
* colTypes or wrong number of arguments. Else return
* mcsv1_UDAF::SUCCESS.
*/
virtual ReturnCode init(mcsv1Context* context,
COL_TYPES& colTypes) = 0;
/**
* finish()
*
* Mandatory. Completes the UDA(n)F. Called once per SQL
* statement. Do not free any memory allocated by
* createUserData(). The SDK Framework owns that memory
* and will handle that. Often, there is nothing to do here.
*/
virtual ReturnCode finish(mcsv1Context* context) = 0;
/**
* reset()
*
* Mandatory. Reset the UDA(n)F for a new group, partition or,
* in some cases, new Window Frame. Do not free any memory
* allocated by createUserData(). The SDK Framework owns
* that memory and will handle that. Use this opportunity to
* reset any variables in context->getUserData() needed for the
* next aggregation. May be called multiple times if running in
* a ditributed fashion.
*
* Use this opportunity to initialize the userData.
*/
virtual ReturnCode reset(mcsv1Context* context) = 0;
/**
* nextValue()
*
* Mandatory. Handle a single row.
*
* colsIn - A vector of data structure describing the input
* data.
*
* This function is called once for every row in the filtered
* result set (before aggregation). It is very important that
* this function is efficient.
*
* If the UDAF is running in a distributed fashion, nextValue
* cannot depend on order, as it will only be called for each
* row found on the specific PM.
*
* valsIn (in) - a vector of the parameters from the row.
*/
virtual ReturnCode nextValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsIn) = 0;
/**
* subEvaluate()
*
* Mandatory -- Called if the UDAF is running in a distributed
* fashion. Columnstore tries to run all aggregate functions
* distributed, depending on context.
*
* Perform an aggregation on rows partially aggregated by
* nextValue. Columnstore calls nextValue for each row on a
* given PM for a group (GROUP BY). subEvaluate is called on the
* UM to consolodate those values into a single instance of
* userData. Keep your aggregated totals in context's userData.
* The first time this is called for a group, reset() would have
* been called with this version of userData.
*
* Called for every partial data set in each group in GROUP BY.
*
* When subEvaluate has been called for all subAggregated data
* sets, Evaluate will be called with the same context as here.
*
* valIn (In) - This is a pointer to a UserData class with the
* partially aggregated values. It will contain the value of
* userData as seen in the last call to NextValue for a given
* PM.
*
*/
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn) = 0;
/**
* evaluate()
*
* Mandatory. Get the aggregated value.
*
* Called for every new group if UDAF GROUP BY, UDAnF partition
* or, in some cases, new Window Frame.
*
* Set the aggregated value into valOut. The datatype is assumed
* to be the same as that set in the init() function;
*
* If the UDAF is running in a distributed fashion, evaluate is
* called after a series of subEvaluate calls.
*
* valOut (out) - Set the aggregated value here. The datatype is
* assumed to be the same as that set in the init() function;
*
* To return a NULL value, don't assign to valOut.
*/
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut) = 0;
/**
* dropValue()
*
* Optional -- If defined, the server will call this instead of
* reset for UDAnF.
*
* Don't implement if a UDAnF has one or more of the following:
* The UDAnF can't be used with a Window Frame
* The UDAnF is not reversable in some way
* The UDAnF is not interested in optimal performance
*
* If not implemented, reset() followed by a series of
* nextValue() will be called for each movement of the Window
* Frame.
*
* If implemented, then each movement of the Window Frame will
* result in dropValue() being called for each row falling out
* of the Frame and nextValue() being called for each new row
* coming into the Frame.
*
* valsDropped (in) - a vector of the parameters from the row
* leaving the Frame
*
* dropValue() will not be called for unbounded/current row type
* frames, as those are already optimized.
*/
virtual ReturnCode dropValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsDropped);
/**
* createUserData()
*
* Optional -- The default is to create a data byte array of
* size as set in context->setUserDataSize()
*
* Create your variable length data structure via
* userData = new <UserData_type>
*
* The data structure may contain references to containers or
* pointers to other objects. Remember that for distributed
* processing, this may be called multiple times for variaous
* computing blocks. At the least, it will be called once per PM
* that processes the data, and once more for the UM. For UDAnF,
* it may only be called once.
*
* Set length to the base length of the data structure you
* create.
*
*/
virtual ReturnCode createUserData(UserData*& userdata, int32_t& length);
protected:
// These are handy for testing the actual type of static_any
static const static_any::any& charTypeId;
static const static_any::any& scharTypeId;
static const static_any::any& shortTypeId;
static const static_any::any& intTypeId;
static const static_any::any& longTypeId;
static const static_any::any& llTypeId;
static const static_any::any& ucharTypeId;
static const static_any::any& ushortTypeId;
static const static_any::any& uintTypeId;
static const static_any::any& ulongTypeId;
static const static_any::any& ullTypeId;
static const static_any::any& floatTypeId;
static const static_any::any& doubleTypeId;
static const static_any::any& strTypeId;
};
/***********************************************************************
* There is no user modifiable code past this point
***********************************************************************/
// Function definitions for mcsv1Context
inline mcsv1Context::mcsv1Context() :
fRunFlags(UDAF_OVER_ALLOWED | UDAF_ORDER_ALLOWED | UDAF_WINDOWFRAME_ALLOWED),
fContextFlags(0),
fUserDataSize(0),
fResultType(CalpontSystemCatalog::UNDEFINED),
fColWidth(0),
fResultscale(0),
fResultPrecision(18),
dataFlags(NULL),
bInterrupted(NULL),
fRowsInPartition(0),
fStartFrame(WF_UNBOUNDED_PRECEDING),
fEndFrame(WF_CURRENT_ROW),
fStartConstant(0),
fEndConstant(0),
func(NULL)
{
}
inline mcsv1Context::mcsv1Context(const mcsv1Context& rhs) :
fContextFlags(0),
fColWidth(0),
dataFlags(NULL),
bInterrupted(NULL),
func(NULL)
{
copy(rhs);
}
inline mcsv1Context& mcsv1Context::copy(const mcsv1Context& rhs)
{
fRunFlags = rhs.getRunFlags();
fResultType = rhs.getResultType();
fUserDataSize = rhs.getUserDataSize();
fResultscale = rhs.getScale();
fResultPrecision = rhs.getPrecision();
rhs.getStartFrame(fStartFrame, fStartConstant);
rhs.getEndFrame(fEndFrame, fEndConstant);
functionName = rhs.getName();
bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference
return *this;
}
inline mcsv1Context::~mcsv1Context()
{
}
inline mcsv1Context& mcsv1Context::operator=(const mcsv1Context& rhs)
{
fContextFlags = 0;
fColWidth = 0;
dataFlags = NULL;
bInterrupted = NULL;
func = NULL;
return copy(rhs);
}
inline void mcsv1Context::setErrorMessage(std::string errmsg)
{
errorMsg = errmsg;
}
inline const std::string& mcsv1Context::getErrorMessage() const
{
return errorMsg;
}
inline uint64_t mcsv1Context::setRunFlags(uint64_t flags)
{
uint64_t f = fRunFlags;
fRunFlags = flags;
return f;
}
inline uint64_t mcsv1Context::getRunFlags() const
{
return fRunFlags;
}
inline bool mcsv1Context::setRunFlag(uint64_t flag)
{
bool b = fRunFlags & flag;
fRunFlags |= flag;
return b;
}
inline bool mcsv1Context::getRunFlag(uint64_t flag)
{
return fRunFlags & flag;
}
inline bool mcsv1Context::clearRunFlag(uint64_t flag)
{
bool b = fRunFlags & flag;
fRunFlags &= ~flag;
return b;
}
inline bool mcsv1Context::toggleRunFlag(uint64_t flag)
{
bool b = fRunFlags & flag;
fRunFlags ^= flag;
return b;
}
inline bool mcsv1Context::isAnalytic()
{
return fContextFlags & CONTEXT_IS_ANALYTIC;
}
inline bool mcsv1Context::isWindowHasCurrentRow()
{
return fContextFlags & CONTEXT_HAS_CURRENT_ROW;
}
inline bool mcsv1Context::isUM()
{
return !(fContextFlags & CONTEXT_IS_PM);
}
inline bool mcsv1Context::isPM()
{
return fContextFlags & CONTEXT_IS_PM;
}
inline size_t mcsv1Context::getParameterCount() const
{
if (dataFlags)
return dataFlags->size();
return 0;
}
inline bool mcsv1Context::isParamNull(int paramIdx)
{
if (dataFlags)
return (*dataFlags)[paramIdx] & PARAM_IS_NULL;
return false;
}
inline bool mcsv1Context::isParamConstant(int paramIdx)
{
if (dataFlags)
return (*dataFlags)[paramIdx] & PARAM_IS_CONSTANT;
return false;
}
inline CalpontSystemCatalog::ColDataType mcsv1Context::getResultType() const
{
return fResultType;
}
inline bool mcsv1Context::setResultType(CalpontSystemCatalog::ColDataType resultType)
{
fResultType = resultType;
return true; // We may want to sanity check here.
}
inline int32_t mcsv1Context::getScale() const
{
return fResultscale;
}
inline int32_t mcsv1Context::getPrecision() const
{
return fResultPrecision;
}
inline bool mcsv1Context::setScale(int32_t scale)
{
fResultscale = scale;
return true;
}
inline bool mcsv1Context::setPrecision(int32_t precision)
{
fResultPrecision = precision;
return true;
}
inline bool mcsv1Context::setColWidth(int32_t colWidth)
{
fColWidth = colWidth;
return true;
}
inline void mcsv1Context::setInterrupted(bool interrupted)
{
if (bInterrupted)
{
*bInterrupted = interrupted;
}
}
inline void mcsv1Context::setInterrupted(bool* interrupted)
{
bInterrupted = interrupted;
}
inline bool mcsv1Context::getInterrupted() const
{
if (bInterrupted)
{
return bInterrupted;
}
return false;
}
inline uint64_t mcsv1Context::getRowsInPartition() const
{
return fRowsInPartition;
}
inline uint64_t mcsv1Context::getRowCnt() const
{
return fRowCnt;
}
inline void mcsv1Context::setUserDataSize(int bytes)
{
fUserDataSize = bytes;
}
inline UserData* mcsv1Context::getUserData()
{
if (!fUserData)
{
createUserData();
}
return fUserData.get();
}
inline boost::shared_ptr<UserData> mcsv1Context::getUserDataSP()
{
if (!fUserData)
{
createUserData();
}
return fUserData;
}
inline void mcsv1Context::setUserData(boost::shared_ptr<UserData> userData)
{
fUserData = userData;
}
inline void mcsv1Context::setUserData(UserData* userData)
{
if (userData)
{
fUserData.reset(userData);
}
else
{
fUserData.reset();
}
}
inline bool mcsv1Context::setDefaultWindowFrame(WF_FRAME defaultStartFrame,
WF_FRAME defaultEndFrame,
int32_t startConstant,
int32_t endConstant)
{
// TODO: Add sanity checks
fStartFrame = defaultStartFrame;
fEndFrame = defaultEndFrame;
fStartConstant = startConstant;
fEndConstant = endConstant;
return true;
}
inline void mcsv1Context::getStartFrame(WF_FRAME& startFrame, int32_t& startConstant) const
{
startFrame = fStartFrame;
startConstant = fStartConstant;
}
inline void mcsv1Context::getEndFrame(WF_FRAME& endFrame, int32_t& endConstant) const
{
endFrame = fEndFrame;
endConstant = fEndConstant;
}
inline const std::string& mcsv1Context::getName() const
{
return functionName;
}
inline void mcsv1Context::setName(std::string name)
{
functionName = name;
}
inline void mcsv1Context::setRowCnt(uint64_t cnt)
{
fRowCnt = cnt;
}
inline uint64_t mcsv1Context::getContextFlags() const
{
return fContextFlags;
}
inline void mcsv1Context::setContextFlags(uint64_t flags)
{
fContextFlags = flags;
}
inline void mcsv1Context::setContextFlag(uint64_t flag)
{
fContextFlags |= flag;
}
inline void mcsv1Context::clearContextFlag(uint64_t flag)
{
fContextFlags &= ~flag;
}
inline uint32_t mcsv1Context::getUserDataSize() const
{
return fUserDataSize;
}
inline std::vector<uint32_t>& mcsv1Context::getDataFlags()
{
return *dataFlags;
}
inline void mcsv1Context::setDataFlags(std::vector<uint32_t>* flags)
{
dataFlags = flags;
}
inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsDropped)
{
return NOT_IMPLEMENTED;
}
inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::createUserData(UserData*& userData, int32_t& length)
{
userData = new UserData(length);
userData->size = length;
return SUCCESS;
}
}; // namespace mcssdk
#undef EXPORT
#endif // HEADER_mcsv1_udaf.h

314
utils/udfsdk/median.cpp Executable file
View File

@ -0,0 +1,314 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <sstream>
#include <cstring>
#include <typeinfo>
#include "median.h"
#include "bytestream.h"
#include "objectreader.h"
using namespace mcsv1sdk;
mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context,
COL_TYPES& colTypes)
{
if (colTypes.size() < 1)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("median() with 0 arguments");
return mcsv1_UDAF::ERROR;
}
if (colTypes.size() > 1)
{
context->setErrorMessage("median() with more than 1 argument");
return mcsv1_UDAF::ERROR;
}
if (!(isNumeric(colTypes[0].second)))
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("median() with non-numeric argument");
return mcsv1_UDAF::ERROR;
}
context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8);
context->setScale(context->getScale()*2);
context->setPrecision(19);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode median::finish(mcsv1Context* context)
{
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode median::reset(mcsv1Context* context)
{
MedianData* data = static_cast<MedianData*>(context->getUserData());
data->mData.clear();
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsIn)
{
static_any::any& valIn = valsIn[0].columnData;
MEDIAN_DATA& data = static_cast<MedianData*>(context->getUserData())->mData;
DATATYPE val = 0.0;
if (valIn.empty())
{
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
}
if (valIn.compatible(charTypeId))
{
val = valIn.cast<char>();
}
else if (valIn.compatible(scharTypeId))
{
val = valIn.cast<signed char>();
}
else if (valIn.compatible(shortTypeId))
{
val = valIn.cast<short>();
}
else if (valIn.compatible(intTypeId))
{
val = valIn.cast<int>();
}
else if (valIn.compatible(longTypeId))
{
val = valIn.cast<long>();
}
else if (valIn.compatible(llTypeId))
{
val = valIn.cast<long long>();
}
else if (valIn.compatible(ucharTypeId))
{
val = valIn.cast<unsigned char>();
}
else if (valIn.compatible(ushortTypeId))
{
val = valIn.cast<unsigned short>();
}
else if (valIn.compatible(uintTypeId))
{
val = valIn.cast<unsigned int>();
}
else if (valIn.compatible(ulongTypeId))
{
val = valIn.cast<unsigned long>();
}
else if (valIn.compatible(ullTypeId))
{
val = valIn.cast<unsigned long long>();
}
else if (valIn.compatible(floatTypeId))
{
val = valIn.cast<float>();
}
else if (valIn.compatible(doubleTypeId))
{
val = valIn.cast<double>();
}
// For decimal types, we need to move the decimal point.
uint32_t scale = valsIn[0].scale;
if (val != 0 && scale > 0)
{
val /= pow(10.0, (double)scale);
}
data[val]++;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode median::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
{
if (!userDataIn)
{
return mcsv1_UDAF::SUCCESS;
}
MEDIAN_DATA& outData = static_cast<MedianData*>(context->getUserData())->mData;
const MEDIAN_DATA& inData = static_cast<const MedianData*>(userDataIn)->mData;
MEDIAN_DATA::const_iterator iter = inData.begin();
for (; iter != inData.end(); ++iter)
{
outData[iter->first] += iter->second;
}
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode median::evaluate(mcsv1Context* context, static_any::any& valOut)
{
uint64_t cnt1=0, cnt2=0;
MEDIAN_DATA& data = static_cast<MedianData*>(context->getUserData())->mData;
MEDIAN_DATA::iterator iter(data.begin());
MEDIAN_DATA::iterator revfrom(data.end());
MEDIAN_DATA::reverse_iterator riter(revfrom);
cnt1 += iter->second;
cnt2 += riter->second;
while (iter->first < riter->first)
{
while (cnt1 < cnt2 && iter->first < riter->first)
{
++iter;
cnt1 += iter->second;
}
while (cnt2 < cnt1 &&iter->first < riter->first)
{
++riter;
cnt2 += riter->second;
}
while (cnt1 == cnt2 && iter->first < riter->first)
{
++iter;
cnt1 += iter->second;
if (iter->first > riter->first)
{
break;
}
++riter;
cnt2 += riter->second;
}
}
valOut = (iter->first + riter->first) / 2;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsDropped)
{
static_any::any& valIn = valsDropped[0].columnData;
MEDIAN_DATA& data = static_cast<MedianData*>(context->getUserData())->mData;
DATATYPE val = 0.0;
if (valIn.empty())
{
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
}
if (valIn.compatible(charTypeId))
{
val = valIn.cast<char>();
}
else if (valIn.compatible(scharTypeId))
{
val = valIn.cast<signed char>();
}
else if (valIn.compatible(shortTypeId))
{
val = valIn.cast<short>();
}
else if (valIn.compatible(intTypeId))
{
val = valIn.cast<int>();
}
else if (valIn.compatible(longTypeId))
{
val = valIn.cast<long>();
}
else if (valIn.compatible(llTypeId))
{
val = valIn.cast<long long>();
}
else if (valIn.compatible(ucharTypeId))
{
val = valIn.cast<unsigned char>();
}
else if (valIn.compatible(ushortTypeId))
{
val = valIn.cast<unsigned short>();
}
else if (valIn.compatible(uintTypeId))
{
val = valIn.cast<unsigned int>();
}
else if (valIn.compatible(ulongTypeId))
{
val = valIn.cast<unsigned long>();
}
else if (valIn.compatible(ullTypeId))
{
val = valIn.cast<unsigned long long>();
}
else if (valIn.compatible(floatTypeId))
{
val = valIn.cast<float>();
}
else if (valIn.compatible(doubleTypeId))
{
val = valIn.cast<double>();
}
// For decimal types, we need to move the decimal point.
uint32_t scale = valsDropped[0].scale;
if (val != 0 && scale > 0)
{
val /= pow(10.0, (double)scale);
}
data[val]--;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode median::createUserData(UserData*& userData, int32_t& length)
{
userData = new MedianData;
length = sizeof(MedianData);
return mcsv1_UDAF::SUCCESS;
}
void MedianData::serialize(messageqcpp::ByteStream& bs) const
{
MEDIAN_DATA::const_iterator iter = mData.begin();
DATATYPE num;
uint32_t cnt;
bs << (int32_t)mData.size();
for (; iter != mData.end(); ++iter)
{
num = iter->first;
bs << num;
cnt = iter->second;
bs << cnt;
}
}
void MedianData::unserialize(messageqcpp::ByteStream& bs)
{
mData.clear();
int32_t sz;
DATATYPE num;
uint32_t cnt;
bs >> sz;
for (int i = 0; i < sz; ++i)
{
bs >> num;
bs >> cnt;
mData[num] = cnt;
}
}

294
utils/udfsdk/median.h Executable file
View File

@ -0,0 +1,294 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/***********************************************************************
* $Id$
*
* mcsv1_UDAF.h
***********************************************************************/
/**
* Columnstore interface for writing a User Defined Aggregate
* Functions (UDAF) and User Defined Analytic Functions (UDAnF)
* or a function that can act as either - UDA(n)F
*
* The basic steps are:
*
* 1. Create a the UDA(n)F function interface in some .h file.
* 2. Create the UDF function implementation in some .cpp file
* 3. Create the connector stub (MariaDB UDAF definition) for
* this UDF function.
* 4. build the dynamic library using all of the source.
* 5 Put the library in $COLUMNSTORE_INSTALL/lib of
* all modules
* 6. restart the Columnstore system.
* 7. notify mysqld about the new function:
*
* CREATE AGGREGATE FUNCTION median returns REAL soname
* 'libudf_mysql.so';
*
* The UDAF functions may run distributed in the Columnstore
* engine. UDAnF do not run distributed.
*
* UDAF is User Defined Aggregate Function.
* UDAnF is User Defined Analytic Function.
* UDA(n)F is an acronym for a function that could be either. It
* is also used to describe the interface that is used for
* either.
*/
#ifndef HEADER_median
#define HEADER_median
#include <cstdlib>
#include <string>
#include <vector>
#include <boost/any.hpp>
#ifdef _MSC_VER
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include "mcsv1_udaf.h"
#include "calpontsystemcatalog.h"
#include "windowfunctioncolumn.h"
using namespace execplan;
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
namespace mcsv1sdk
{
#define DATATYPE double
typedef std::map<DATATYPE, uint32_t> MEDIAN_DATA;
// Override UserData for data storage
struct MedianData : public UserData
{
MedianData() {};
virtual ~MedianData(){}
virtual void serialize(messageqcpp::ByteStream& bs) const;
virtual void unserialize(messageqcpp::ByteStream& bs);
MEDIAN_DATA mData;
private:
// For now, copy construction is unwanted
MedianData(UserData&);
};
// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or
// User Defined Analytic Function (UDAnF).
// These will be singleton classes, so don't put any instance
// specific data in here. All instance data is stored in mcsv1Context
// passed to each user function and retrieved by the getUserData() method.
//
// Each API function returns a ReturnCode. If ERROR is returned at any time,
// the query is aborted, getInterrupted() will begin to return true and the
// message set in config->setErrorMessage() is returned to MariaDB.
// Return the median value of the dataset
class median : public mcsv1_UDAF
{
public:
// Defaults OK
median() : mcsv1_UDAF(){};
virtual ~median(){};
/**
* init()
*
* Mandatory. Implement this to initialize flags and instance
* data. Called once per SQL statement. You can do any sanity
* checks here.
*
* colTypes (in) - A vector of ColDataType defining the
* parameters of the UDA(n)F call. These can be used to decide
* to override the default return type. If desired, the new
* return type can be set by context->setReturnType() and
* decimal scale and precision can be set by context->setScale
* and context->setPrecision respectively.
*
* Return mcsv1_UDAF::ERROR on any error, such as non-compatible
* colTypes or wrong number of arguments. Else return
* mcsv1_UDAF::SUCCESS.
*/
virtual ReturnCode init(mcsv1Context* context,
COL_TYPES& colTypes);
/**
* finish()
*
* Mandatory. Completes the UDA(n)F. Called once per SQL
* statement. Do not free any memory allocated by
* context->setUserDataSize(). The SDK Framework owns that memory
* and will handle that. Often, there is nothing to do here.
*/
virtual ReturnCode finish(mcsv1Context* context);
/**
* reset()
*
* Mandatory. Reset the UDA(n)F for a new group, partition or,
* in some cases, new Window Frame. Do not free any memory
* allocated by context->setUserDataSize(). The SDK Framework owns
* that memory and will handle that. Use this opportunity to
* reset any variables in context->getUserData() needed for the
* next aggregation. May be called multiple times if running in
* a ditributed fashion.
*
* Use this opportunity to initialize the userData.
*/
virtual ReturnCode reset(mcsv1Context* context);
/**
* nextValue()
*
* Mandatory. Handle a single row.
*
* colsIn - A vector of data structure describing the input
* data.
*
* This function is called once for every row in the filtered
* result set (before aggregation). It is very important that
* this function is efficient.
*
* If the UDAF is running in a distributed fashion, nextValue
* cannot depend on order, as it will only be called for each
* row found on the specific PM.
*
* valsIn (in) - a vector of the parameters from the row.
*/
virtual ReturnCode nextValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsIn);
/**
* subEvaluate()
*
* Mandatory -- Called if the UDAF is running in a distributed
* fashion. Columnstore tries to run all aggregate functions
* distributed, depending on context.
*
* Perform an aggregation on rows partially aggregated by
* nextValue. Columnstore calls nextValue for each row on a
* given PM for a group (GROUP BY). subEvaluate is called on the
* UM to consolodate those values into a single instance of
* userData. Keep your aggregated totals in context's userData.
* The first time this is called for a group, reset() would have
* been called with this version of userData.
*
* Called for every partial data set in each group in GROUP BY.
*
* When subEvaluate has been called for all subAggregated data
* sets, Evaluate will be called with the same context as here.
*
* valIn (In) - This is a pointer to a memory block of the size
* set in setUserDataSize. It will contain the value of userData
* as seen in the last call to NextValue for a given PM.
*
*/
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn);
/**
* evaluate()
*
* Mandatory. Get the aggregated value.
*
* Called for every new group if UDAF GROUP BY, UDAnF partition
* or, in some cases, new Window Frame.
*
* Set the aggregated value into valOut. The datatype is assumed
* to be the same as that set in the init() function;
*
* If the UDAF is running in a distributed fashion, evaluate is
* called after a series of subEvaluate calls.
*
* valOut (out) - Set the aggregated value here. The datatype is
* assumed to be the same as that set in the init() function;
*
* To return a NULL value, don't assign to valOut.
*/
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
/**
* dropValue()
*
* Optional -- If defined, the server will call this instead of
* reset for UDAnF.
*
* Don't implement if a UDAnF has one or more of the following:
* The UDAnF can't be used with a Window Frame
* The UDAnF is not reversable in some way
* The UDAnF is not interested in optimal performance
*
* If not implemented, reset() followed by a series of
* nextValue() will be called for each movement of the Window
* Frame.
*
* If implemented, then each movement of the Window Frame will
* result in dropValue() being called for each row falling out
* of the Frame and nextValue() being called for each new row
* coming into the Frame.
*
* valsDropped (in) - a vector of the parameters from the row
* leaving the Frame
*
* dropValue() will not be called for unbounded/current row type
* frames, as those are already optimized.
*/
virtual ReturnCode dropValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsDropped);
/**
* createUserData()
*
* Optional -- If defined, the server will call this instead of
* createUserData on context.
*
* Create your variable length data structure via
* data = new <datatype>
*
* The data structure may contain references to containers or
* pointers to other objects. Remember that for distributed
* processing, this may be called multiple times for variaous
* computing blocks. At the least, it will be called once per PM
* that processes the data, and once more for the UM. For UDAnF,
* it may only be called once.
*
* Set length to the length of the data structure you create.
*
* For each call to createUserData(), there will be a
* corresponding deleteUserData() where you must clean up. Any
* memory leaks are your fault.
*
*/
virtual ReturnCode createUserData(UserData*& data, int32_t& length);
protected:
};
}; // namespace
#undef EXPORT
#endif // HEADER_median.h

250
utils/udfsdk/ssq.cpp Executable file
View File

@ -0,0 +1,250 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <sstream>
#include <cstring>
#include <typeinfo>
#include "ssq.h"
#include "bytestream.h"
#include "objectreader.h"
using namespace mcsv1sdk;
#define DATATYPE double
struct ssq_data
{
uint64_t scale;
DATATYPE sumsq;
ssq_data() : scale(0){}
};
#define OUT_TYPE int64_t
mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context,
COL_TYPES& colTypes)
{
if (colTypes.size() < 1)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("ssq() with 0 arguments");
return mcsv1_UDAF::ERROR;
}
if (colTypes.size() > 1)
{
context->setErrorMessage("ssq() with more than 1 argument");
return mcsv1_UDAF::ERROR;
}
if (!(isNumeric(colTypes[0].second)))
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("ssq() with non-numeric argument");
return mcsv1_UDAF::ERROR;
}
context->setUserDataSize(sizeof(ssq_data));
context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8);
context->setScale(context->getScale()*2);
context->setPrecision(19);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode ssq::finish(mcsv1Context* context)
{
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode ssq::reset(mcsv1Context* context)
{
struct ssq_data* data = (struct ssq_data*)context->getUserData()->data;
if (data)
{
data->scale = 0;
data->sumsq = 0;
}
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsIn)
{
static_any::any& valIn = valsIn[0].columnData;
struct ssq_data* data = (struct ssq_data*)context->getUserData()->data;
DATATYPE val = 0.0;
if (valIn.empty())
{
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
}
if (valIn.compatible(charTypeId))
{
val = valIn.cast<char>();
}
else if (valIn.compatible(scharTypeId))
{
val = valIn.cast<signed char>();
}
else if (valIn.compatible(shortTypeId))
{
val = valIn.cast<short>();
}
else if (valIn.compatible(intTypeId))
{
val = valIn.cast<int>();
}
else if (valIn.compatible(longTypeId))
{
val = valIn.cast<long>();
}
else if (valIn.compatible(llTypeId))
{
val = valIn.cast<long long>();
}
else if (valIn.compatible(ucharTypeId))
{
val = valIn.cast<unsigned char>();
}
else if (valIn.compatible(ushortTypeId))
{
val = valIn.cast<unsigned short>();
}
else if (valIn.compatible(uintTypeId))
{
val = valIn.cast<unsigned int>();
}
else if (valIn.compatible(ulongTypeId))
{
val = valIn.cast<unsigned long>();
}
else if (valIn.compatible(ullTypeId))
{
val = valIn.cast<unsigned long long>();
}
else if (valIn.compatible(floatTypeId))
{
val = valIn.cast<float>();
}
else if (valIn.compatible(doubleTypeId))
{
val = valIn.cast<double>();
}
// For decimal types, we need to move the decimal point.
uint32_t scale = valsIn[0].scale;
if (val != 0 && scale > 0)
{
val /= pow(10.0, (double)scale);
}
data->sumsq += val*val;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode ssq::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
{
struct ssq_data* outData = (struct ssq_data*)context->getUserData()->data;
struct ssq_data* inData = (struct ssq_data*)userDataIn->data;
outData->sumsq += inData->sumsq;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode ssq::evaluate(mcsv1Context* context, static_any::any& valOut)
{
struct ssq_data* data = (struct ssq_data*)context->getUserData()->data;
valOut = data->sumsq;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsDropped)
{
static_any::any& valIn = valsDropped[0].columnData;
struct ssq_data* data = (struct ssq_data*)context->getUserData()->data;
DATATYPE val = 0.0;
if (valIn.empty())
{
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
}
if (valIn.compatible(charTypeId))
{
val = valIn.cast<char>();
}
else if (valIn.compatible(scharTypeId))
{
val = valIn.cast<signed char>();
}
else if (valIn.compatible(shortTypeId))
{
val = valIn.cast<short>();
}
else if (valIn.compatible(intTypeId))
{
val = valIn.cast<int>();
}
else if (valIn.compatible(longTypeId))
{
val = valIn.cast<long>();
}
else if (valIn.compatible(llTypeId))
{
val = valIn.cast<long long>();
}
else if (valIn.compatible(ucharTypeId))
{
val = valIn.cast<unsigned char>();
}
else if (valIn.compatible(ushortTypeId))
{
val = valIn.cast<unsigned short>();
}
else if (valIn.compatible(uintTypeId))
{
val = valIn.cast<unsigned int>();
}
else if (valIn.compatible(ulongTypeId))
{
val = valIn.cast<unsigned long>();
}
else if (valIn.compatible(ullTypeId))
{
val = valIn.cast<unsigned long long>();
}
else if (valIn.compatible(floatTypeId))
{
val = valIn.cast<float>();
}
else if (valIn.compatible(doubleTypeId))
{
val = valIn.cast<double>();
}
// For decimal types, we need to move the decimal point.
uint32_t scale = valsDropped[0].scale;
if (val != 0 && scale > 0)
{
val /= pow(10.0, (double)scale);
}
data->sumsq -= val*val;
return mcsv1_UDAF::SUCCESS;
}

248
utils/udfsdk/ssq.h Executable file
View File

@ -0,0 +1,248 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/***********************************************************************
* $Id$
*
* mcsv1_UDAF.h
***********************************************************************/
/**
* Columnstore interface for writing a User Defined Aggregate
* Functions (UDAF) and User Defined Analytic Functions (UDAnF)
* or a function that can act as either - UDA(n)F
*
* The basic steps are:
*
* 1. Create a the UDA(n)F function interface in some .h file.
* 2. Create the UDF function implementation in some .cpp file
* 3. Create the connector stub (MariaDB UDAF definition) for
* this UDF function.
* 4. build the dynamic library using all of the source.
* 5 Put the library in $COLUMNSTORE_INSTALL/lib of
* all modules
* 6. restart the Columnstore system.
* 7. notify mysqld about the new function:
*
* CREATE AGGREGATE FUNCTION ssq returns REAL soname
* 'libudf_mysql.so';
*
* The UDAF function will run distributed in the Columnstore
* engine. UDAnF do not run distributed.
*
* UDAF is User Defined Aggregate Function.
* UDAnF is User Defined Analytic Function.
* UDA(n)F is an acronym for a function that could be either. It
* is also used to describe the interface that is used for
* either.
*/
#ifndef HEADER_ssq
#define HEADER_ssq
#include <cstdlib>
#include <string>
#include <vector>
#include <boost/any.hpp>
#ifdef _MSC_VER
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include "mcsv1_udaf.h"
#include "calpontsystemcatalog.h"
#include "windowfunctioncolumn.h"
using namespace execplan;
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
namespace mcsv1sdk
{
// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or
// User Defined Analytic Function (UDAnF).
// These will be singleton classes, so don't put any instance
// specific data in here. All instance data is stored in mcsv1Context
// passed to each user function and retrieved by the getUserData() method.
//
// Each API function returns a ReturnCode. If ERROR is returned at any time,
// the query is aborted, getInterrupted() will begin to return true and the
// message set in config->setErrorMessage() is returned to MariaDB.
// A simple aggregate to return the sum of squares
class ssq : public mcsv1_UDAF
{
public:
// Defaults OK
ssq() : mcsv1_UDAF(){};
virtual ~ssq(){};
/**
* init()
*
* Mandatory. Implement this to initialize flags and instance
* data. Called once per SQL statement. You can do any sanity
* checks here.
*
* colTypes (in) - A vector of ColDataType defining the
* parameters of the UDA(n)F call. These can be used to decide
* to override the default return type. If desired, the new
* return type can be set by context->setReturnType() and
* decimal scale and precision can be set by context->setScale
* and context->setPrecision respectively.
*
* Return mcsv1_UDAF::ERROR on any error, such as non-compatible
* colTypes or wrong number of arguments. Else return
* mcsv1_UDAF::SUCCESS.
*/
virtual ReturnCode init(mcsv1Context* context,
COL_TYPES& colTypes);
/**
* finish()
*
* Mandatory. Completes the UDA(n)F. Called once per SQL
* statement. Do not free any memory allocated by
* context->createUserData(). The SDK Framework owns that memory
* and will handle that. Often, there is nothing to do here.
*/
virtual ReturnCode finish(mcsv1Context* context);
/**
* reset()
*
* Mandatory. Reset the UDA(n)F for a new group, partition or,
* in some cases, new Window Frame. Do not free any memory
* allocated by context->createUserData(). The SDK Framework
* owns that memory and will handle that. Use this opportunity
* to reset any variables in context->getUserData() needed for
* the next aggregation. May be called multiple times on
* different modules.
*/
virtual ReturnCode reset(mcsv1Context* context);
/**
* nextValue()
*
* Mandatory. Handle a single row.
*
* colsIn - A vector of data structure describing the input
* data.
*
* This function is called once for every row in the filtered
* result set (before aggregation). It is very important that
* this function is efficient.
*
* If the UDAF is running in a distributed fashion, nextValue
* cannot depend on order, as it will only be called for each
* row found on the specific PM.
*
* valsIn (in) - a vector of the parameters from the row.
*/
virtual ReturnCode nextValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsIn);
/**
* subEvaluate()
*
* Mandatory -- Called if the UDAF is running in a distributed
* fashion. Columnstore tries to run all aggregate functions
* distributed, depending on context.
*
* Perform an aggregation on rows partially aggregated by
* nextValue. Columnstore calls nextValue for each row on a
* given PM for a group (GROUP BY). subEvaluate is called on the
* UM to consolodate those values into a single instance of
* userData. Keep your aggregated totals in context's userData.
* The first time this is called for a group, reset() would have
* been called with this version of userData.
*
* Called for every partial data set in each group in GROUP BY.
*
* When subEvaluate has been called for all subAggregated data
* sets, Evaluate will be called.
*
* valIn (In) - This is a pointer to a memory block of the size
* set in setUserDataSize. It will contain the value of userData
* as seen in the last call to NextValue for a given PM.
*
*/
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn);
/**
* evaluate()
*
* Mandatory. Get the aggregated value.
*
* Called for every new group if UDAF GROUP BY, UDAnF partition
* or, in some cases, new Window Frame.
*
* Set the aggregated value into valOut. The datatype is assumed
* to be the same as that set in the init() function;
*
* If the UDAF is running in a distributed fashion, evaluate is
* called after a series of subEvaluate calls.
*
* valOut (out) - Set the aggregated value here. The datatype is
* assumed to be the same as that set in the init() function;
*
* To return a NULL value, don't assign to valOut.
*/
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
/**
* dropValue()
*
* Optional -- If defined, the server will call this instead of
* reset for UDAnF.
*
* Don't implement if a UDAnF has one or more of the following:
* The UDAnF can't be used with a Window Frame
* The UDAnF is not reversable in some way
* The UDAnF is not interested in optimal performance
*
* If not implemented, reset() followed by a series of
* nextValue() will be called for each movement of the Window
* Frame.
*
* If implemented, then each movement of the Window Frame will
* result in dropValue() being called for each row falling out
* of the Frame and nextValue() being called for each new row
* coming into the Frame.
*
* valsDropped (in) - a vector of the parameters from the row
* leaving the Frame
*
* dropValue() will not be called for unbounded/current row type
* frames, as those are already optimized.
*/
virtual ReturnCode dropValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsDropped);
protected:
};
}; // namespace
#undef EXPORT
#endif // HEADER_ssq.h

228
utils/udfsdk/udfmysql.cpp Normal file → Executable file
View File

@ -168,13 +168,239 @@ void mcs_isnull_deinit(UDF_INIT* initid)
}
#ifdef _MSC_VER
__declspec(dllexport)
__declspec(dllexport)f
#endif
long long mcs_isnull(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error)
{
return 0;
}
/**
* ALLNULL connector stub
*/
struct allnull_data
{
ulonglong totalQuantity;
ulonglong totalNulls;
};
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool allnull_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
struct allnull_data* data;
// if (args->arg_count != 1)
// {
// strcpy(message,"allnull() requires one argument");
// return 1;
// }
if (!(data = (struct allnull_data*) malloc(sizeof(struct allnull_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->totalQuantity = 0;
data->totalNulls = 0;
initid->ptr = (char*)data;
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void allnull_deinit(UDF_INIT* initid)
{
free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
long long allnull(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
struct allnull_data* data = (struct allnull_data*)initid->ptr;
return data->totalQuantity > 0 && data->totalNulls == data->totalQuantity;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
allnull_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
struct allnull_data* data = (struct allnull_data*)initid->ptr;
data->totalQuantity = 0;
data->totalNulls = 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
allnull_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
struct allnull_data* data = (struct allnull_data*)initid->ptr;
const char *word=args->args[0];
data->totalQuantity++;
if (!word)
{
data->totalNulls++;
}
}
/**
* SSQ connector stub
*/
struct ssq_data
{
double sumsq;
};
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool ssq_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
struct ssq_data* data;
if (args->arg_count != 1)
{
strcpy(message,"ssq() requires one argument");
return 1;
}
if (!(data = (struct ssq_data*) malloc(sizeof(struct ssq_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->sumsq = 0;
initid->ptr = (char*)data;
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void ssq_deinit(UDF_INIT* initid)
{
free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
ssq_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
struct ssq_data* data = (struct ssq_data*)initid->ptr;
data->sumsq = 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
ssq_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
struct ssq_data* data = (struct ssq_data*)initid->ptr;
double val = cvtArgToDouble(args->arg_type[0], args->args[0]);
data->sumsq = val*val;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
long long ssq(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
struct ssq_data* data = (struct ssq_data*)initid->ptr;
return data->sumsq;
}
//=======================================================================
/**
* MEDIAN connector stub
*/
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool median_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
if (args->arg_count != 1)
{
strcpy(message,"median() requires one argument");
return 1;
}
/*
if (!(data = (struct ssq_data*) malloc(sizeof(struct ssq_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->sumsq = 0;
initid->ptr = (char*)data;
*/
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void median_deinit(UDF_INIT* initid)
{
// free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
median_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
// struct ssq_data* data = (struct ssq_data*)initid->ptr;
// data->sumsq = 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
median_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
// struct ssq_data* data = (struct ssq_data*)initid->ptr;
// double val = cvtArgToDouble(args->arg_type[0], args->args[0]);
// data->sumsq = val*val;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
long long median(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
// struct ssq_data* data = (struct ssq_data*)initid->ptr;
// return data->sumsq;
return 0;
}
}
// vim:ts=4 sw=4:

9
utils/udfsdk/udfsdk.vpj Normal file → Executable file
View File

@ -202,12 +202,20 @@
<Folder
Name="Source Files"
Filters="*.c;*.C;*.cc;*.cpp;*.cp;*.cxx;*.c++;*.prg;*.pas;*.dpr;*.asm;*.s;*.bas;*.java;*.cs;*.sc;*.e;*.cob;*.html;*.rc;*.tcl;*.py;*.pl;*.d">
<F N="allnull.cpp"/>
<F N="mcsv1_udaf.cpp"/>
<F N="median.cpp"/>
<F N="ssq.cpp"/>
<F N="udfinfinidb.cpp"/>
<F N="udfmysql.cpp"/>
</Folder>
<Folder
Name="Header Files"
Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if">
<F N="allnull.h"/>
<F N="mcsv1_udaf.h"/>
<F N="median.h"/>
<F N="ssq.h"/>
<F N="udfsdk.h"/>
</Folder>
<Folder
@ -222,6 +230,7 @@
<F
N="Makefile"
Type="Makefile"/>
<F N="mcsv1_UDAF_base"/>
</Folder>
</Files>
</Project>