You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-08-01 06:46:55 +03:00
MCOL-523 Add UDAF and UDAnF SDK
This commit is contained in:
990
utils/udfsdk/mcsv1_udaf.h
Executable file
990
utils/udfsdk/mcsv1_udaf.h
Executable file
@ -0,0 +1,990 @@
|
||||
/* Copyright (C) 2017 MariaDB Corporaton
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; version 2 of
|
||||
the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
MA 02110-1301, USA. */
|
||||
|
||||
/***********************************************************************
|
||||
* $Id$
|
||||
*
|
||||
* mcsv1_UDAF.h
|
||||
***********************************************************************/
|
||||
|
||||
/**
|
||||
* Columnstore interface for writing a User Defined Aggregate
|
||||
* Functions (UDAF) and User Defined Analytic Functions (UDAnF)
|
||||
* or a function that can act as either - UDA(n)F
|
||||
*
|
||||
* The basic steps are:
|
||||
*
|
||||
* 1. Create a the UDA(n)F function interface in some .h file.
|
||||
* 2. Create the UDF function implementation in some .cpp file
|
||||
* 3. Create the connector stub (MariaDB UDAF definition) for
|
||||
* this UDF function.
|
||||
* 4. build the dynamic library using all of the source.
|
||||
* 5 Put the library in $COLUMNSTORE_INSTALL/lib of
|
||||
* all modules
|
||||
* 6. restart the Columnstore system.
|
||||
* 7. notify mysqld about the new functions with commands like:
|
||||
*
|
||||
* // An example of xor over a range for UDAF and UDAnF
|
||||
* CREATE AGGREGATE FUNCTION mcs_bit_xor returns BOOL soname
|
||||
* 'libudfsdk.so';
|
||||
*
|
||||
* // An example that only makes sense as a UDAnF
|
||||
* CREATE AGGREGATE FUNCTION mcs_interpolate returns REAL
|
||||
* soname 'libudfsdk.so';
|
||||
*
|
||||
* The UDAF functions may run distributed in the Columnstore
|
||||
* engine. UDAnF do not run distributed.
|
||||
*
|
||||
* UDAF is User Defined Aggregate Function.
|
||||
* UDAnF is User Defined Analytic Function.
|
||||
* UDA(n)F is an acronym for a function that could be either. It
|
||||
* is also used to describe the interface that is used for
|
||||
* either.
|
||||
*/
|
||||
#ifndef HEADER_mcsv1_udaf
|
||||
#define HEADER_mcsv1_udaf
|
||||
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <boost/any.hpp>
|
||||
#ifdef _MSC_VER
|
||||
#include <unordered_map>
|
||||
#else
|
||||
#include <tr1/unordered_map>
|
||||
#endif
|
||||
#include "any.hpp"
|
||||
#include "calpontsystemcatalog.h"
|
||||
#include "wf_frame.h"
|
||||
|
||||
using namespace execplan;
|
||||
|
||||
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
|
||||
#define EXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define EXPORT
|
||||
#endif
|
||||
|
||||
namespace mcsv1sdk
|
||||
{
|
||||
/**
|
||||
* A map from name to function object.
|
||||
*
|
||||
* This is temporary until we get the library loading task
|
||||
* complete
|
||||
*
|
||||
* TODO: Remove when library loading is enabled.
|
||||
*/
|
||||
class mcsv1_UDAF;
|
||||
typedef std::tr1::unordered_map<std::string, mcsv1_UDAF*> UDAF_MAP;
|
||||
|
||||
class UDAFMap
|
||||
{
|
||||
public:
|
||||
EXPORT UDAFMap(){};
|
||||
|
||||
EXPORT ~UDAFMap(){};
|
||||
|
||||
static EXPORT UDAF_MAP& getMap();
|
||||
private:
|
||||
static UDAF_MAP fm;
|
||||
};
|
||||
|
||||
/**
|
||||
* A class to hold your user data
|
||||
*
|
||||
* If your UDAF only needs a fixed sized data struct, you need
|
||||
* do nothing with this. Call setUserDataSize in your init
|
||||
* function with the required size and the framework will take
|
||||
* care of it.
|
||||
*
|
||||
* If you need something more or just want to control things,
|
||||
* then override UserData with your data structure and
|
||||
* implement createUserData in your function object to create
|
||||
* your data structure. Your UserData destuctor should take care
|
||||
* of any cleanup you may need (Simple containers clean
|
||||
* themselves up).
|
||||
*/
|
||||
class mcsv1Context;
|
||||
|
||||
struct UserData
|
||||
{
|
||||
UserData() : size(0), data(NULL) {};
|
||||
UserData(size_t sz) {size = sz; data = new uint8_t[sz];}
|
||||
|
||||
virtual ~UserData() { if (data) delete [] data;}
|
||||
|
||||
/**
|
||||
* serialize()
|
||||
*
|
||||
* User data is passed between process. In order to do so, it
|
||||
* must be serialized. Since user data can have sub objects,
|
||||
* containers and the like, it is up to the UDAF to provide the
|
||||
* serialize function. The streaming functionality of
|
||||
* messageqcpp::ByteStream must be used.
|
||||
*
|
||||
* The default streams the size and data buffer to the
|
||||
* ByteStream
|
||||
*/
|
||||
virtual void serialize(messageqcpp::ByteStream& bs) const;
|
||||
|
||||
/**
|
||||
* unserialize()
|
||||
*
|
||||
* User data is passed between process. In order to do so, it
|
||||
* must be unserialized. Since user data can have sub objects,
|
||||
* containers and the like, it is up to the UDAF to provide the
|
||||
* unserialize function. The streaming functionality of
|
||||
* messageqcpp::ByteStream must be used.
|
||||
*
|
||||
* data is the datablock returned by createUserData.
|
||||
*
|
||||
* The default creates the data array and streams into data.
|
||||
*/
|
||||
virtual void unserialize(messageqcpp::ByteStream& bs);
|
||||
|
||||
// The default data store. You may or may not wish to use these fields.
|
||||
uint32_t size;
|
||||
uint8_t* data;
|
||||
private:
|
||||
// For now, copy construction is unwanted
|
||||
UserData(UserData&);
|
||||
};
|
||||
|
||||
// Flags to define the type and limitations of a UDA(n)F
|
||||
// Used in context->fRunFlags
|
||||
static uint64_t UDAF_OVER_REQUIRED __attribute__ ((unused)) = 1; // May only be used as UDAnF
|
||||
static uint64_t UDAF_OVER_ALLOWED __attribute__ ((unused)) = 1 << 1; // May be used as UDAF or UDAnF
|
||||
static uint64_t UDAF_ORDER_REQUIRED __attribute__ ((unused)) = 1 << 2; // If used as UDAnF, ORDER BY is required
|
||||
static uint64_t UDAF_ORDER_ALLOWED __attribute__ ((unused)) = 1 << 3; // If used as UDAnF, ORDER BY is optional
|
||||
static uint64_t UDAF_WINDOWFRAME_REQUIRED __attribute__ ((unused)) = 1 << 4; // If used as UDAnF, a WINDOW FRAME is required
|
||||
static uint64_t UDAF_WINDOWFRAME_ALLOWED __attribute__ ((unused)) = 1 << 5; // If used as UDAnF, a WINDOW FRAME is optional
|
||||
static uint64_t UDAF_MAYBE_NULL __attribute__ ((unused)) = 1 << 6; // If UDA(n)F might return NULL.
|
||||
static uint64_t UDAF_IGNORE_NULLS __attribute__ ((unused)) = 1 << 7; // If UDA(n)F wants NULL rows suppressed.
|
||||
|
||||
// Flags set by the framework to define the context of the call.
|
||||
// User code shouldn't use these directly
|
||||
// used in context->fContextFlags
|
||||
static uint64_t CONTEXT_IS_ANALYTIC __attribute__ ((unused)) = 1; // If called using OVER
|
||||
static uint64_t CONTEXT_HAS_CURRENT_ROW __attribute__ ((unused)) = 1 << 1; // The current window contains the current row.
|
||||
static uint64_t CONTEXT_IS_PM __attribute__ ((unused)) = 1 << 2; // The call was made by the PM
|
||||
|
||||
// Flags that describe the contents of a specific input parameter
|
||||
// These will be set in context->dataFlags for each method call by the framework.
|
||||
// User code shouldn't use these directly
|
||||
static uint64_t PARAM_IS_NULL __attribute__ ((unused)) = 1;
|
||||
static uint64_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1;
|
||||
|
||||
// shorthand for the list of columns in the call sent to init()
|
||||
// first is the actual column name and second is the data type in Columnstore.
|
||||
typedef std::vector<std::pair<std::string, CalpontSystemCatalog::ColDataType> >COL_TYPES;
|
||||
|
||||
// This is the context class that is passed to all API callbacks
|
||||
// The framework potentially sets data here for each invocation of
|
||||
// mcsv1_UDAF methods. Access methods are given for data useful to UDA(n)F.
|
||||
// Don't modify anything directly except data retrieved with getUserData().
|
||||
|
||||
// UDA(n)F devlopers should not modify this class. The framework and other UDA(n)F
|
||||
// rely on it being as it was when they were compiled.
|
||||
//
|
||||
// It's probable that future versions of Columnstore will add functionality to
|
||||
// the context. UDA(n)F may need to be re-compiled in this case.
|
||||
class mcsv1Context
|
||||
{
|
||||
public:
|
||||
EXPORT mcsv1Context();
|
||||
EXPORT mcsv1Context(const mcsv1Context& rhs);
|
||||
// The destructor is virtual only in case a version 2 is made derived from v1
|
||||
// to promote backward compatibility.
|
||||
// mcsv1Context should never be subclassed by UDA(n)F developers
|
||||
EXPORT virtual ~mcsv1Context();
|
||||
|
||||
// Set an error message if something goes wrong
|
||||
EXPORT void setErrorMessage(std::string errmsg);
|
||||
|
||||
// Get the previously set error message
|
||||
EXPORT const std::string& getErrorMessage() const;
|
||||
|
||||
// Set the flags as a set. Return the previous flags.
|
||||
EXPORT uint64_t setRunFlags(uint64_t flags);
|
||||
// return the flags
|
||||
EXPORT uint64_t getRunFlags() const;
|
||||
|
||||
// The following set, get, clear and toggle methods can be used to manipulate
|
||||
// multiple flags by ORing them together in the call sequence.
|
||||
// Ex setRunFlag(UDAF_OVER_REQUIRED | UDAF_ORDER_REQUIRED);
|
||||
// sets both flags and returns true if BOTH flags are already set.
|
||||
//
|
||||
// Set a specific flag and return its previous setting
|
||||
EXPORT bool setRunFlag(uint64_t flag);
|
||||
// Get a specific flag
|
||||
EXPORT bool getRunFlag(uint64_t flag);
|
||||
// clear a specific flag and return its previous setting
|
||||
EXPORT bool clearRunFlag(uint64_t flag);
|
||||
// toggle a specific flag and return its previous setting
|
||||
EXPORT bool toggleRunFlag(uint64_t flag);
|
||||
|
||||
// Use these to determine the way your UDA(n)F was called
|
||||
// Valid in all method calls
|
||||
EXPORT bool isAnalytic();
|
||||
EXPORT bool isWindowHasCurrentRow();
|
||||
|
||||
// Determine if the call is made by the UM
|
||||
// This could be because the UDA(n)F is not being distributed
|
||||
// Or it could be during setup or during consolodation of PM values.
|
||||
// valid in all calls
|
||||
EXPORT bool isUM();
|
||||
|
||||
// Determine if the call is made by the PM
|
||||
// This will be during partial aggregation performed on the PM
|
||||
// valid in all calls
|
||||
EXPORT bool isPM();
|
||||
|
||||
// Parameter refinement description accessors
|
||||
// valid in nextValue, dropValue and evaluateCumulative
|
||||
size_t getParameterCount() const;
|
||||
|
||||
// Determine if an input parameter is NULL
|
||||
// valid in nextValue, dropValue and evaluateCumulative
|
||||
EXPORT bool isParamNull(int paramIdx);
|
||||
|
||||
// If a parameter is a constant, the UDA(n)F could presumably optimize its workings.
|
||||
// During the first call to nextValue() or evaluateCumulative().
|
||||
// Is there a better way to determine this?
|
||||
// valid in nextValue, dropValue and evaluateCumulative
|
||||
EXPORT bool isParamConstant(int paramIdx);
|
||||
|
||||
// For getting the result type.
|
||||
EXPORT CalpontSystemCatalog::ColDataType getResultType() const;
|
||||
|
||||
// For getting the decimal characteristics for the return value.
|
||||
// These will be set to the default before init().
|
||||
EXPORT int32_t getScale() const;
|
||||
EXPORT int32_t getPrecision() const;
|
||||
|
||||
// If you want to change the result type
|
||||
// valid in init()
|
||||
EXPORT bool setResultType(CalpontSystemCatalog::ColDataType resultType);
|
||||
|
||||
// For setting the decimal characteristics for the return value.
|
||||
// This only makes sense if the return type is decimal, but should be set
|
||||
// to (0, -1) for other types if the inout is decimal.
|
||||
// valid in init()
|
||||
EXPORT bool setScale(int32_t scale);
|
||||
EXPORT bool setPrecision(int32_t precision);
|
||||
|
||||
// For all types, get the return column width in bytes. Ex. INT will return 4.
|
||||
EXPORT int32_t getColWidth();
|
||||
|
||||
// For non-numric return types, set the return column width. This defaults
|
||||
// to the the length of the input.
|
||||
// valid in init()
|
||||
EXPORT bool setColWidth(int32_t colWidth);
|
||||
|
||||
// If a method is known to take a while, call this periodically to see if something
|
||||
// interupted the processing. If getInterrupted() returns true, then the executing
|
||||
// method should clean up and exit.
|
||||
EXPORT bool getInterrupted() const;
|
||||
|
||||
// Returns the actual number of rows in the partition. If no partitioning, returns 0.
|
||||
// valid in reset()
|
||||
EXPORT uint64_t getRowsInPartition() const;
|
||||
|
||||
// Returns the number of rows in the aggregate. This could be the total number of rows,
|
||||
// the number of rows in the group, or the number of rows in the PM's subaggregate,
|
||||
// depending on the context it was called.
|
||||
// valid in subEvaluate() end evaluate().
|
||||
EXPORT uint64_t getRowCnt() const;
|
||||
|
||||
// Allocate instance specific memory. This should be type cast to a structure overlay
|
||||
// defined by the function. The actual allocatoin occurs in the various modules that
|
||||
// do the aggregation. If the UDAF is being calculated in a distributed fashion, then
|
||||
// multiple instances of this data may be allocated. Calls to the subaggregate functions
|
||||
// do not share a context.
|
||||
// You do not need to worry about freeing this memory. The framework handles all management.
|
||||
// Call this during init()
|
||||
EXPORT void setUserDataSize(int bytes);
|
||||
|
||||
// Call this everywhere except init()
|
||||
EXPORT UserData* getUserData();
|
||||
|
||||
// Many UDAnF need a default Window Frame. If none is set here, the default is
|
||||
// UNBOUNDED PRECEDING to CURRENT ROW.
|
||||
// It's possible to not allow the the WINDOW FRAME phrase in the UDAnF by setting
|
||||
// the UDAF_WINDOWFRAME_REQUIRED and UDAF_WINDOWFRAME_ALLOWED both to false. Columnstore
|
||||
// requires a Window Frame in order to process UDAnF. In this case, the default will
|
||||
// be used for all calls.
|
||||
// Possible values for start frame are
|
||||
// WF_UNBOUNDED_PRECEDING, WF_CURRENT_ROW, WF_PRECEDING or WF_FOLLOWING
|
||||
// possible values for end frame are
|
||||
// WF_CURRENT_ROW, WF_UNBOUNDED_FOLLOWING, WF_PRECEDING or WF_FOLLOWING
|
||||
// If WF_PRECEEdING and/or WF_FOLLOWING, a start or end constant should
|
||||
// be included to say how many preceeding or following is the default
|
||||
// Set this during init()
|
||||
EXPORT bool setDefaultWindowFrame(WF_FRAME defaultStartFrame,
|
||||
WF_FRAME defaultEndFrame,
|
||||
int32_t startConstant = 0, // For WF_PRECEEDING or WF_FOLLOWING
|
||||
int32_t endConstant = 0); // For WF_PRECEEDING or WF_FOLLOWING
|
||||
|
||||
// There may be times you want to know the actual frame set by the caller
|
||||
EXPORT void getStartFrame(WF_FRAME& startFrame, int32_t& startConstant) const;
|
||||
EXPORT void getEndFrame(WF_FRAME& endFrame, int32_t& endConstant) const;
|
||||
|
||||
// Deep Equivalence
|
||||
bool operator==(const mcsv1Context& c) const;
|
||||
bool operator!=(const mcsv1Context& c) const;
|
||||
|
||||
// stream operator for debugging
|
||||
EXPORT const std::string toString() const;
|
||||
|
||||
// Get the name of the function
|
||||
EXPORT const std::string& getName() const;
|
||||
|
||||
EXPORT mcsv1Context& operator=(const mcsv1Context& rhs);
|
||||
EXPORT mcsv1Context& copy(const mcsv1Context& rhs);
|
||||
|
||||
private:
|
||||
|
||||
uint64_t fRunFlags; // Set by the user to define the type of UDA(n)F
|
||||
uint64_t fContextFlags; // Set by the framework to define this specific call.
|
||||
int32_t fUserDataSize;
|
||||
boost::shared_ptr<UserData> fUserData;
|
||||
CalpontSystemCatalog::ColDataType fResultType;
|
||||
int32_t fColWidth; // The length in bytes of the return type
|
||||
int32_t fResultscale; // For scale, the number of digits to the right of the decimal
|
||||
int32_t fResultPrecision; // The max number of digits allowed in the decimal value
|
||||
std::string errorMsg;
|
||||
std::vector<uint32_t>* dataFlags; // one entry for each parameter
|
||||
bool* bInterrupted; // Gets set to true by the Framework if something happens
|
||||
uint64_t fRowsInPartition; // Only valid in reset()
|
||||
int64_t fRowCnt; // The number of rows involved in this aggregate.
|
||||
WF_FRAME fStartFrame; // Is set to default to start, then modified by the actual frame in the call
|
||||
WF_FRAME fEndFrame; // Is set to default to start, then modified by the actual frame in the call
|
||||
int32_t fStartConstant; // for start frame WF_PRECEEDIMG or WF_FOLLOWING
|
||||
int32_t fEndConstant; // for end frame WF_PRECEEDIMG or WF_FOLLOWING
|
||||
std::string functionName;
|
||||
mcsv1sdk::mcsv1_UDAF* func;
|
||||
|
||||
public:
|
||||
// For use by the framework
|
||||
EXPORT void serialize(messageqcpp::ByteStream& b) const;
|
||||
EXPORT void unserialize(messageqcpp::ByteStream& b);
|
||||
EXPORT void createUserData();
|
||||
EXPORT void setUserData(boost::shared_ptr<UserData> userData);
|
||||
EXPORT void setUserData(UserData* userData);
|
||||
EXPORT void setName(std::string name);
|
||||
EXPORT void setContextFlags(uint64_t flags);
|
||||
EXPORT void setContextFlag(uint64_t flag);
|
||||
EXPORT void clearContextFlag(uint64_t flag);
|
||||
EXPORT uint64_t getContextFlags() const;
|
||||
EXPORT uint32_t getUserDataSize() const;
|
||||
EXPORT std::vector<uint32_t>& getDataFlags();
|
||||
EXPORT void setDataFlags(std::vector<uint32_t>* flags);
|
||||
EXPORT void setInterrupted(bool interrupted);
|
||||
EXPORT void setInterrupted(bool* interrupted);
|
||||
EXPORT void setRowCnt(uint64_t cnt);
|
||||
EXPORT mcsv1sdk::mcsv1_UDAF* getFunction();
|
||||
EXPORT mcsv1sdk::mcsv1_UDAF* getFunction() const;
|
||||
EXPORT boost::shared_ptr<UserData> getUserDataSP();
|
||||
};
|
||||
|
||||
// Since aggregate functions can operate on any data type, we use the following structure
|
||||
// to define the input row data. To be type insensiteve, data is stored in type static_any::any.
|
||||
//
|
||||
// To access the data it must be type cast to the correct type using boost::any_cast.
|
||||
// example for int data:
|
||||
//
|
||||
// if (dataType == CalpontSystemCatalog::INT)
|
||||
// int myint = boost::any_cast<int>columnData;
|
||||
//
|
||||
// For multi-paramter aggregations, the colsIn vector of next_value()
|
||||
// contains the ordered set of row parameters.
|
||||
//
|
||||
// For char, varchar, text, varbinary and blob types, columnData will be std::string.
|
||||
struct ColumnDatum
|
||||
{
|
||||
CalpontSystemCatalog::ColDataType dataType; // defined in calpontsystemcatalog.h
|
||||
static_any::any columnData;
|
||||
uint32_t scale; // If dataType is a DECIMAL type
|
||||
uint32_t precision; // If dataType is a DECIMAL type
|
||||
ColumnDatum() : dataType(CalpontSystemCatalog::UNDEFINED), scale(0), precision(-1){};
|
||||
};
|
||||
|
||||
// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or
|
||||
// User Defined Analytic Function (UDAnF).
|
||||
// These will be singleton classes, so don't put any instance
|
||||
// specific data in here. All instance data is stored in mcsv1Context
|
||||
// passed to each user function and retrieved by the getUserData() method.
|
||||
//
|
||||
// Each API function returns a ReturnCode. If ERROR is returned at any time,
|
||||
// the query is aborted, getInterrupted() will begin to return true and the
|
||||
// message set in config->setErrorMessage() is returned to MariaDB.
|
||||
class mcsv1_UDAF
|
||||
{
|
||||
public:
|
||||
enum ReturnCode
|
||||
{
|
||||
ERROR = 0,
|
||||
SUCCESS = 1,
|
||||
NOT_IMPLEMENTED = 2 // User UDA(n)F shouldn't return this
|
||||
};
|
||||
// Defaults OK
|
||||
mcsv1_UDAF(){};
|
||||
virtual ~mcsv1_UDAF(){};
|
||||
|
||||
/**
|
||||
* init()
|
||||
*
|
||||
* Mandatory. Implement this to initialize flags and instance
|
||||
* data. Called once per SQL statement. You can do any sanity
|
||||
* checks here.
|
||||
*
|
||||
* colTypes (in) - A vector of ColDataType defining the
|
||||
* parameters of the UDA(n)F call. These can be used to decide
|
||||
* to override the default return type. If desired, the new
|
||||
* return type can be set by context->setReturnType() and
|
||||
* decimal scale and precision can be set by context->setScale
|
||||
* and context->setPrecision respectively.
|
||||
*
|
||||
* Return mcsv1_UDAF::ERROR on any error, such as non-compatible
|
||||
* colTypes or wrong number of arguments. Else return
|
||||
* mcsv1_UDAF::SUCCESS.
|
||||
*/
|
||||
virtual ReturnCode init(mcsv1Context* context,
|
||||
COL_TYPES& colTypes) = 0;
|
||||
|
||||
/**
|
||||
* finish()
|
||||
*
|
||||
* Mandatory. Completes the UDA(n)F. Called once per SQL
|
||||
* statement. Do not free any memory allocated by
|
||||
* createUserData(). The SDK Framework owns that memory
|
||||
* and will handle that. Often, there is nothing to do here.
|
||||
*/
|
||||
virtual ReturnCode finish(mcsv1Context* context) = 0;
|
||||
|
||||
/**
|
||||
* reset()
|
||||
*
|
||||
* Mandatory. Reset the UDA(n)F for a new group, partition or,
|
||||
* in some cases, new Window Frame. Do not free any memory
|
||||
* allocated by createUserData(). The SDK Framework owns
|
||||
* that memory and will handle that. Use this opportunity to
|
||||
* reset any variables in context->getUserData() needed for the
|
||||
* next aggregation. May be called multiple times if running in
|
||||
* a ditributed fashion.
|
||||
*
|
||||
* Use this opportunity to initialize the userData.
|
||||
*/
|
||||
virtual ReturnCode reset(mcsv1Context* context) = 0;
|
||||
|
||||
/**
|
||||
* nextValue()
|
||||
*
|
||||
* Mandatory. Handle a single row.
|
||||
*
|
||||
* colsIn - A vector of data structure describing the input
|
||||
* data.
|
||||
*
|
||||
* This function is called once for every row in the filtered
|
||||
* result set (before aggregation). It is very important that
|
||||
* this function is efficient.
|
||||
*
|
||||
* If the UDAF is running in a distributed fashion, nextValue
|
||||
* cannot depend on order, as it will only be called for each
|
||||
* row found on the specific PM.
|
||||
*
|
||||
* valsIn (in) - a vector of the parameters from the row.
|
||||
*/
|
||||
virtual ReturnCode nextValue(mcsv1Context* context,
|
||||
std::vector<ColumnDatum>& valsIn) = 0;
|
||||
|
||||
/**
|
||||
* subEvaluate()
|
||||
*
|
||||
* Mandatory -- Called if the UDAF is running in a distributed
|
||||
* fashion. Columnstore tries to run all aggregate functions
|
||||
* distributed, depending on context.
|
||||
*
|
||||
* Perform an aggregation on rows partially aggregated by
|
||||
* nextValue. Columnstore calls nextValue for each row on a
|
||||
* given PM for a group (GROUP BY). subEvaluate is called on the
|
||||
* UM to consolodate those values into a single instance of
|
||||
* userData. Keep your aggregated totals in context's userData.
|
||||
* The first time this is called for a group, reset() would have
|
||||
* been called with this version of userData.
|
||||
*
|
||||
* Called for every partial data set in each group in GROUP BY.
|
||||
*
|
||||
* When subEvaluate has been called for all subAggregated data
|
||||
* sets, Evaluate will be called with the same context as here.
|
||||
*
|
||||
* valIn (In) - This is a pointer to a UserData class with the
|
||||
* partially aggregated values. It will contain the value of
|
||||
* userData as seen in the last call to NextValue for a given
|
||||
* PM.
|
||||
*
|
||||
*/
|
||||
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn) = 0;
|
||||
|
||||
/**
|
||||
* evaluate()
|
||||
*
|
||||
* Mandatory. Get the aggregated value.
|
||||
*
|
||||
* Called for every new group if UDAF GROUP BY, UDAnF partition
|
||||
* or, in some cases, new Window Frame.
|
||||
*
|
||||
* Set the aggregated value into valOut. The datatype is assumed
|
||||
* to be the same as that set in the init() function;
|
||||
*
|
||||
* If the UDAF is running in a distributed fashion, evaluate is
|
||||
* called after a series of subEvaluate calls.
|
||||
*
|
||||
* valOut (out) - Set the aggregated value here. The datatype is
|
||||
* assumed to be the same as that set in the init() function;
|
||||
*
|
||||
* To return a NULL value, don't assign to valOut.
|
||||
*/
|
||||
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut) = 0;
|
||||
|
||||
/**
|
||||
* dropValue()
|
||||
*
|
||||
* Optional -- If defined, the server will call this instead of
|
||||
* reset for UDAnF.
|
||||
*
|
||||
* Don't implement if a UDAnF has one or more of the following:
|
||||
* The UDAnF can't be used with a Window Frame
|
||||
* The UDAnF is not reversable in some way
|
||||
* The UDAnF is not interested in optimal performance
|
||||
*
|
||||
* If not implemented, reset() followed by a series of
|
||||
* nextValue() will be called for each movement of the Window
|
||||
* Frame.
|
||||
*
|
||||
* If implemented, then each movement of the Window Frame will
|
||||
* result in dropValue() being called for each row falling out
|
||||
* of the Frame and nextValue() being called for each new row
|
||||
* coming into the Frame.
|
||||
*
|
||||
* valsDropped (in) - a vector of the parameters from the row
|
||||
* leaving the Frame
|
||||
*
|
||||
* dropValue() will not be called for unbounded/current row type
|
||||
* frames, as those are already optimized.
|
||||
*/
|
||||
virtual ReturnCode dropValue(mcsv1Context* context,
|
||||
std::vector<ColumnDatum>& valsDropped);
|
||||
|
||||
/**
|
||||
* createUserData()
|
||||
*
|
||||
* Optional -- The default is to create a data byte array of
|
||||
* size as set in context->setUserDataSize()
|
||||
*
|
||||
* Create your variable length data structure via
|
||||
* userData = new <UserData_type>
|
||||
*
|
||||
* The data structure may contain references to containers or
|
||||
* pointers to other objects. Remember that for distributed
|
||||
* processing, this may be called multiple times for variaous
|
||||
* computing blocks. At the least, it will be called once per PM
|
||||
* that processes the data, and once more for the UM. For UDAnF,
|
||||
* it may only be called once.
|
||||
*
|
||||
* Set length to the base length of the data structure you
|
||||
* create.
|
||||
*
|
||||
*/
|
||||
virtual ReturnCode createUserData(UserData*& userdata, int32_t& length);
|
||||
|
||||
protected:
|
||||
// These are handy for testing the actual type of static_any
|
||||
static const static_any::any& charTypeId;
|
||||
static const static_any::any& scharTypeId;
|
||||
static const static_any::any& shortTypeId;
|
||||
static const static_any::any& intTypeId;
|
||||
static const static_any::any& longTypeId;
|
||||
static const static_any::any& llTypeId;
|
||||
static const static_any::any& ucharTypeId;
|
||||
static const static_any::any& ushortTypeId;
|
||||
static const static_any::any& uintTypeId;
|
||||
static const static_any::any& ulongTypeId;
|
||||
static const static_any::any& ullTypeId;
|
||||
static const static_any::any& floatTypeId;
|
||||
static const static_any::any& doubleTypeId;
|
||||
static const static_any::any& strTypeId;
|
||||
};
|
||||
|
||||
/***********************************************************************
|
||||
* There is no user modifiable code past this point
|
||||
***********************************************************************/
|
||||
// Function definitions for mcsv1Context
|
||||
inline mcsv1Context::mcsv1Context() :
|
||||
fRunFlags(UDAF_OVER_ALLOWED | UDAF_ORDER_ALLOWED | UDAF_WINDOWFRAME_ALLOWED),
|
||||
fContextFlags(0),
|
||||
fUserDataSize(0),
|
||||
fResultType(CalpontSystemCatalog::UNDEFINED),
|
||||
fColWidth(0),
|
||||
fResultscale(0),
|
||||
fResultPrecision(18),
|
||||
dataFlags(NULL),
|
||||
bInterrupted(NULL),
|
||||
fRowsInPartition(0),
|
||||
fStartFrame(WF_UNBOUNDED_PRECEDING),
|
||||
fEndFrame(WF_CURRENT_ROW),
|
||||
fStartConstant(0),
|
||||
fEndConstant(0),
|
||||
func(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
inline mcsv1Context::mcsv1Context(const mcsv1Context& rhs) :
|
||||
fContextFlags(0),
|
||||
fColWidth(0),
|
||||
dataFlags(NULL),
|
||||
bInterrupted(NULL),
|
||||
func(NULL)
|
||||
{
|
||||
copy(rhs);
|
||||
}
|
||||
|
||||
inline mcsv1Context& mcsv1Context::copy(const mcsv1Context& rhs)
|
||||
{
|
||||
fRunFlags = rhs.getRunFlags();
|
||||
fResultType = rhs.getResultType();
|
||||
fUserDataSize = rhs.getUserDataSize();
|
||||
fResultscale = rhs.getScale();
|
||||
fResultPrecision = rhs.getPrecision();
|
||||
rhs.getStartFrame(fStartFrame, fStartConstant);
|
||||
rhs.getEndFrame(fEndFrame, fEndConstant);
|
||||
functionName = rhs.getName();
|
||||
bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline mcsv1Context::~mcsv1Context()
|
||||
{
|
||||
}
|
||||
|
||||
inline mcsv1Context& mcsv1Context::operator=(const mcsv1Context& rhs)
|
||||
{
|
||||
fContextFlags = 0;
|
||||
fColWidth = 0;
|
||||
dataFlags = NULL;
|
||||
bInterrupted = NULL;
|
||||
func = NULL;
|
||||
return copy(rhs);
|
||||
}
|
||||
|
||||
inline void mcsv1Context::setErrorMessage(std::string errmsg)
|
||||
{
|
||||
errorMsg = errmsg;
|
||||
}
|
||||
|
||||
inline const std::string& mcsv1Context::getErrorMessage() const
|
||||
{
|
||||
return errorMsg;
|
||||
}
|
||||
|
||||
inline uint64_t mcsv1Context::setRunFlags(uint64_t flags)
|
||||
{
|
||||
uint64_t f = fRunFlags;
|
||||
fRunFlags = flags;
|
||||
return f;
|
||||
}
|
||||
|
||||
inline uint64_t mcsv1Context::getRunFlags() const
|
||||
{
|
||||
return fRunFlags;
|
||||
}
|
||||
|
||||
inline bool mcsv1Context::setRunFlag(uint64_t flag)
|
||||
{
|
||||
bool b = fRunFlags & flag;
|
||||
fRunFlags |= flag;
|
||||
return b;
|
||||
}
|
||||
|
||||
inline bool mcsv1Context::getRunFlag(uint64_t flag)
|
||||
{
|
||||
return fRunFlags & flag;
|
||||
}
|
||||
|
||||
inline bool mcsv1Context::clearRunFlag(uint64_t flag)
|
||||
{
|
||||
bool b = fRunFlags & flag;
|
||||
fRunFlags &= ~flag;
|
||||
return b;
|
||||
}
|
||||
|
||||
inline bool mcsv1Context::toggleRunFlag(uint64_t flag)
|
||||
{
|
||||
bool b = fRunFlags & flag;
|
||||
fRunFlags ^= flag;
|
||||
return b;
|
||||
}
|
||||
|
||||
inline bool mcsv1Context::isAnalytic()
|
||||
{
|
||||
return fContextFlags & CONTEXT_IS_ANALYTIC;
|
||||
}
|
||||
|
||||
inline bool mcsv1Context::isWindowHasCurrentRow()
|
||||
{
|
||||
return fContextFlags & CONTEXT_HAS_CURRENT_ROW;
|
||||
}
|
||||
|
||||
inline bool mcsv1Context::isUM()
|
||||
{
|
||||
return !(fContextFlags & CONTEXT_IS_PM);
|
||||
}
|
||||
|
||||
inline bool mcsv1Context::isPM()
|
||||
{
|
||||
return fContextFlags & CONTEXT_IS_PM;
|
||||
}
|
||||
|
||||
inline size_t mcsv1Context::getParameterCount() const
|
||||
{
|
||||
if (dataFlags)
|
||||
return dataFlags->size();
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline bool mcsv1Context::isParamNull(int paramIdx)
|
||||
{
|
||||
if (dataFlags)
|
||||
return (*dataFlags)[paramIdx] & PARAM_IS_NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool mcsv1Context::isParamConstant(int paramIdx)
|
||||
{
|
||||
if (dataFlags)
|
||||
return (*dataFlags)[paramIdx] & PARAM_IS_CONSTANT;
|
||||
return false;
|
||||
}
|
||||
|
||||
inline CalpontSystemCatalog::ColDataType mcsv1Context::getResultType() const
|
||||
{
|
||||
return fResultType;
|
||||
}
|
||||
|
||||
inline bool mcsv1Context::setResultType(CalpontSystemCatalog::ColDataType resultType)
|
||||
{
|
||||
fResultType = resultType;
|
||||
return true; // We may want to sanity check here.
|
||||
}
|
||||
|
||||
inline int32_t mcsv1Context::getScale() const
|
||||
{
|
||||
return fResultscale;
|
||||
}
|
||||
|
||||
inline int32_t mcsv1Context::getPrecision() const
|
||||
{
|
||||
return fResultPrecision;
|
||||
}
|
||||
|
||||
inline bool mcsv1Context::setScale(int32_t scale)
|
||||
{
|
||||
fResultscale = scale;
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool mcsv1Context::setPrecision(int32_t precision)
|
||||
{
|
||||
fResultPrecision = precision;
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool mcsv1Context::setColWidth(int32_t colWidth)
|
||||
{
|
||||
fColWidth = colWidth;
|
||||
return true;
|
||||
}
|
||||
|
||||
inline void mcsv1Context::setInterrupted(bool interrupted)
|
||||
{
|
||||
if (bInterrupted)
|
||||
{
|
||||
*bInterrupted = interrupted;
|
||||
}
|
||||
}
|
||||
|
||||
inline void mcsv1Context::setInterrupted(bool* interrupted)
|
||||
{
|
||||
bInterrupted = interrupted;
|
||||
}
|
||||
|
||||
inline bool mcsv1Context::getInterrupted() const
|
||||
{
|
||||
if (bInterrupted)
|
||||
{
|
||||
return bInterrupted;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline uint64_t mcsv1Context::getRowsInPartition() const
|
||||
{
|
||||
return fRowsInPartition;
|
||||
}
|
||||
|
||||
inline uint64_t mcsv1Context::getRowCnt() const
|
||||
{
|
||||
return fRowCnt;
|
||||
}
|
||||
|
||||
inline void mcsv1Context::setUserDataSize(int bytes)
|
||||
{
|
||||
fUserDataSize = bytes;
|
||||
}
|
||||
|
||||
inline UserData* mcsv1Context::getUserData()
|
||||
{
|
||||
if (!fUserData)
|
||||
{
|
||||
createUserData();
|
||||
}
|
||||
return fUserData.get();
|
||||
}
|
||||
|
||||
inline boost::shared_ptr<UserData> mcsv1Context::getUserDataSP()
|
||||
{
|
||||
if (!fUserData)
|
||||
{
|
||||
createUserData();
|
||||
}
|
||||
return fUserData;
|
||||
}
|
||||
|
||||
inline void mcsv1Context::setUserData(boost::shared_ptr<UserData> userData)
|
||||
{
|
||||
fUserData = userData;
|
||||
}
|
||||
|
||||
inline void mcsv1Context::setUserData(UserData* userData)
|
||||
{
|
||||
if (userData)
|
||||
{
|
||||
fUserData.reset(userData);
|
||||
}
|
||||
else
|
||||
{
|
||||
fUserData.reset();
|
||||
}
|
||||
}
|
||||
|
||||
inline bool mcsv1Context::setDefaultWindowFrame(WF_FRAME defaultStartFrame,
|
||||
WF_FRAME defaultEndFrame,
|
||||
int32_t startConstant,
|
||||
int32_t endConstant)
|
||||
{
|
||||
// TODO: Add sanity checks
|
||||
fStartFrame = defaultStartFrame;
|
||||
fEndFrame = defaultEndFrame;
|
||||
fStartConstant = startConstant;
|
||||
fEndConstant = endConstant;
|
||||
return true;
|
||||
}
|
||||
|
||||
inline void mcsv1Context::getStartFrame(WF_FRAME& startFrame, int32_t& startConstant) const
|
||||
{
|
||||
startFrame = fStartFrame;
|
||||
startConstant = fStartConstant;
|
||||
}
|
||||
|
||||
inline void mcsv1Context::getEndFrame(WF_FRAME& endFrame, int32_t& endConstant) const
|
||||
{
|
||||
endFrame = fEndFrame;
|
||||
endConstant = fEndConstant;
|
||||
}
|
||||
|
||||
inline const std::string& mcsv1Context::getName() const
|
||||
{
|
||||
return functionName;
|
||||
}
|
||||
|
||||
inline void mcsv1Context::setName(std::string name)
|
||||
{
|
||||
functionName = name;
|
||||
}
|
||||
|
||||
inline void mcsv1Context::setRowCnt(uint64_t cnt)
|
||||
{
|
||||
fRowCnt = cnt;
|
||||
}
|
||||
|
||||
inline uint64_t mcsv1Context::getContextFlags() const
|
||||
{
|
||||
return fContextFlags;
|
||||
}
|
||||
|
||||
inline void mcsv1Context::setContextFlags(uint64_t flags)
|
||||
{
|
||||
fContextFlags = flags;
|
||||
}
|
||||
|
||||
inline void mcsv1Context::setContextFlag(uint64_t flag)
|
||||
{
|
||||
fContextFlags |= flag;
|
||||
}
|
||||
|
||||
inline void mcsv1Context::clearContextFlag(uint64_t flag)
|
||||
{
|
||||
fContextFlags &= ~flag;
|
||||
}
|
||||
|
||||
inline uint32_t mcsv1Context::getUserDataSize() const
|
||||
{
|
||||
return fUserDataSize;
|
||||
}
|
||||
|
||||
inline std::vector<uint32_t>& mcsv1Context::getDataFlags()
|
||||
{
|
||||
return *dataFlags;
|
||||
}
|
||||
|
||||
inline void mcsv1Context::setDataFlags(std::vector<uint32_t>* flags)
|
||||
{
|
||||
dataFlags = flags;
|
||||
}
|
||||
|
||||
inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context,
|
||||
std::vector<ColumnDatum>& valsDropped)
|
||||
{
|
||||
return NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::createUserData(UserData*& userData, int32_t& length)
|
||||
{
|
||||
userData = new UserData(length);
|
||||
userData->size = length;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
}; // namespace mcssdk
|
||||
|
||||
#undef EXPORT
|
||||
|
||||
#endif // HEADER_mcsv1_udaf.h
|
||||
|
Reference in New Issue
Block a user