You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
Reformat all code to coding standard
This commit is contained in:
380
utils/udfsdk/median.h
Executable file → Normal file
380
utils/udfsdk/median.h
Executable file → Normal file
@ -21,34 +21,34 @@
|
||||
* mcsv1_UDAF.h
|
||||
***********************************************************************/
|
||||
|
||||
/**
|
||||
* Columnstore interface for writing a User Defined Aggregate
|
||||
* Functions (UDAF) and User Defined Analytic Functions (UDAnF)
|
||||
* or a function that can act as either - UDA(n)F
|
||||
*
|
||||
/**
|
||||
* Columnstore interface for writing a User Defined Aggregate
|
||||
* Functions (UDAF) and User Defined Analytic Functions (UDAnF)
|
||||
* or a function that can act as either - UDA(n)F
|
||||
*
|
||||
* The basic steps are:
|
||||
*
|
||||
* 1. Create a the UDA(n)F function interface in some .h file.
|
||||
* 2. Create the UDF function implementation in some .cpp file
|
||||
* 3. Create the connector stub (MariaDB UDAF definition) for
|
||||
* this UDF function.
|
||||
* 4. build the dynamic library using all of the source.
|
||||
* 5 Put the library in $COLUMNSTORE_INSTALL/lib of
|
||||
* all modules
|
||||
* 6. restart the Columnstore system.
|
||||
* 1. Create a the UDA(n)F function interface in some .h file.
|
||||
* 2. Create the UDF function implementation in some .cpp file
|
||||
* 3. Create the connector stub (MariaDB UDAF definition) for
|
||||
* this UDF function.
|
||||
* 4. build the dynamic library using all of the source.
|
||||
* 5 Put the library in $COLUMNSTORE_INSTALL/lib of
|
||||
* all modules
|
||||
* 6. restart the Columnstore system.
|
||||
* 7. notify mysqld about the new function:
|
||||
*
|
||||
*
|
||||
* CREATE AGGREGATE FUNCTION median returns REAL soname
|
||||
* 'libudf_mysql.so';
|
||||
*
|
||||
* The UDAF functions may run distributed in the Columnstore
|
||||
* engine. UDAnF do not run distributed.
|
||||
*
|
||||
* UDAF is User Defined Aggregate Function.
|
||||
* UDAnF is User Defined Analytic Function.
|
||||
* UDA(n)F is an acronym for a function that could be either. It
|
||||
* is also used to describe the interface that is used for
|
||||
* either.
|
||||
*
|
||||
* The UDAF functions may run distributed in the Columnstore
|
||||
* engine. UDAnF do not run distributed.
|
||||
*
|
||||
* UDAF is User Defined Aggregate Function.
|
||||
* UDAnF is User Defined Analytic Function.
|
||||
* UDA(n)F is an acronym for a function that could be either. It
|
||||
* is also used to describe the interface that is used for
|
||||
* either.
|
||||
*/
|
||||
#ifndef HEADER_median
|
||||
#define HEADER_median
|
||||
@ -83,196 +83,196 @@ typedef std::map<DATATYPE, uint32_t> MEDIAN_DATA;
|
||||
// Override UserData for data storage
|
||||
struct MedianData : public UserData
|
||||
{
|
||||
MedianData() {};
|
||||
MedianData() {};
|
||||
|
||||
virtual ~MedianData(){}
|
||||
virtual ~MedianData() {}
|
||||
|
||||
virtual void serialize(messageqcpp::ByteStream& bs) const;
|
||||
virtual void unserialize(messageqcpp::ByteStream& bs);
|
||||
virtual void serialize(messageqcpp::ByteStream& bs) const;
|
||||
virtual void unserialize(messageqcpp::ByteStream& bs);
|
||||
|
||||
MEDIAN_DATA mData;
|
||||
MEDIAN_DATA mData;
|
||||
private:
|
||||
// For now, copy construction is unwanted
|
||||
MedianData(UserData&);
|
||||
// For now, copy construction is unwanted
|
||||
MedianData(UserData&);
|
||||
};
|
||||
|
||||
// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or
|
||||
// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or
|
||||
// User Defined Analytic Function (UDAnF).
|
||||
// These will be singleton classes, so don't put any instance
|
||||
// specific data in here. All instance data is stored in mcsv1Context
|
||||
// passed to each user function and retrieved by the getUserData() method.
|
||||
//
|
||||
// Each API function returns a ReturnCode. If ERROR is returned at any time,
|
||||
// the query is aborted, getInterrupted() will begin to return true and the
|
||||
// message set in config->setErrorMessage() is returned to MariaDB.
|
||||
//
|
||||
// Each API function returns a ReturnCode. If ERROR is returned at any time,
|
||||
// the query is aborted, getInterrupted() will begin to return true and the
|
||||
// message set in config->setErrorMessage() is returned to MariaDB.
|
||||
|
||||
// Return the median value of the dataset
|
||||
|
||||
class median : public mcsv1_UDAF
|
||||
{
|
||||
public:
|
||||
// Defaults OK
|
||||
median() : mcsv1_UDAF(){};
|
||||
virtual ~median(){};
|
||||
// Defaults OK
|
||||
median() : mcsv1_UDAF() {};
|
||||
virtual ~median() {};
|
||||
|
||||
/**
|
||||
* init()
|
||||
*
|
||||
* Mandatory. Implement this to initialize flags and instance
|
||||
* data. Called once per SQL statement. You can do any sanity
|
||||
* checks here.
|
||||
*
|
||||
* colTypes (in) - A vector of ColDataType defining the
|
||||
* parameters of the UDA(n)F call. These can be used to decide
|
||||
* to override the default return type. If desired, the new
|
||||
* return type can be set by context->setReturnType() and
|
||||
* decimal scale and precision can be set by context->setScale
|
||||
* and context->setPrecision respectively.
|
||||
*
|
||||
* Return mcsv1_UDAF::ERROR on any error, such as non-compatible
|
||||
* colTypes or wrong number of arguments. Else return
|
||||
* mcsv1_UDAF::SUCCESS.
|
||||
*/
|
||||
virtual ReturnCode init(mcsv1Context* context,
|
||||
COL_TYPES& colTypes);
|
||||
/**
|
||||
* init()
|
||||
*
|
||||
* Mandatory. Implement this to initialize flags and instance
|
||||
* data. Called once per SQL statement. You can do any sanity
|
||||
* checks here.
|
||||
*
|
||||
* colTypes (in) - A vector of ColDataType defining the
|
||||
* parameters of the UDA(n)F call. These can be used to decide
|
||||
* to override the default return type. If desired, the new
|
||||
* return type can be set by context->setReturnType() and
|
||||
* decimal scale and precision can be set by context->setScale
|
||||
* and context->setPrecision respectively.
|
||||
*
|
||||
* Return mcsv1_UDAF::ERROR on any error, such as non-compatible
|
||||
* colTypes or wrong number of arguments. Else return
|
||||
* mcsv1_UDAF::SUCCESS.
|
||||
*/
|
||||
virtual ReturnCode init(mcsv1Context* context,
|
||||
COL_TYPES& colTypes);
|
||||
|
||||
/**
|
||||
* reset()
|
||||
*
|
||||
* Mandatory. Reset the UDA(n)F for a new group, partition or,
|
||||
* in some cases, new Window Frame. Do not free any memory
|
||||
* allocated by context->setUserDataSize(). The SDK Framework owns
|
||||
* that memory and will handle that. Use this opportunity to
|
||||
* reset any variables in context->getUserData() needed for the
|
||||
* next aggregation. May be called multiple times if running in
|
||||
* a ditributed fashion.
|
||||
*
|
||||
* Use this opportunity to initialize the userData.
|
||||
*/
|
||||
virtual ReturnCode reset(mcsv1Context* context);
|
||||
/**
|
||||
* reset()
|
||||
*
|
||||
* Mandatory. Reset the UDA(n)F for a new group, partition or,
|
||||
* in some cases, new Window Frame. Do not free any memory
|
||||
* allocated by context->setUserDataSize(). The SDK Framework owns
|
||||
* that memory and will handle that. Use this opportunity to
|
||||
* reset any variables in context->getUserData() needed for the
|
||||
* next aggregation. May be called multiple times if running in
|
||||
* a ditributed fashion.
|
||||
*
|
||||
* Use this opportunity to initialize the userData.
|
||||
*/
|
||||
virtual ReturnCode reset(mcsv1Context* context);
|
||||
|
||||
/**
|
||||
* nextValue()
|
||||
*
|
||||
* Mandatory. Handle a single row.
|
||||
*
|
||||
* colsIn - A vector of data structure describing the input
|
||||
* data.
|
||||
*
|
||||
* This function is called once for every row in the filtered
|
||||
* result set (before aggregation). It is very important that
|
||||
* this function is efficient.
|
||||
*
|
||||
* If the UDAF is running in a distributed fashion, nextValue
|
||||
* cannot depend on order, as it will only be called for each
|
||||
* row found on the specific PM.
|
||||
*
|
||||
* valsIn (in) - a vector of the parameters from the row.
|
||||
*/
|
||||
virtual ReturnCode nextValue(mcsv1Context* context,
|
||||
std::vector<ColumnDatum>& valsIn);
|
||||
/**
|
||||
* nextValue()
|
||||
*
|
||||
* Mandatory. Handle a single row.
|
||||
*
|
||||
* colsIn - A vector of data structure describing the input
|
||||
* data.
|
||||
*
|
||||
* This function is called once for every row in the filtered
|
||||
* result set (before aggregation). It is very important that
|
||||
* this function is efficient.
|
||||
*
|
||||
* If the UDAF is running in a distributed fashion, nextValue
|
||||
* cannot depend on order, as it will only be called for each
|
||||
* row found on the specific PM.
|
||||
*
|
||||
* valsIn (in) - a vector of the parameters from the row.
|
||||
*/
|
||||
virtual ReturnCode nextValue(mcsv1Context* context,
|
||||
std::vector<ColumnDatum>& valsIn);
|
||||
|
||||
/**
|
||||
* subEvaluate()
|
||||
*
|
||||
* Mandatory -- Called if the UDAF is running in a distributed
|
||||
* fashion. Columnstore tries to run all aggregate functions
|
||||
* distributed, depending on context.
|
||||
*
|
||||
* Perform an aggregation on rows partially aggregated by
|
||||
* nextValue. Columnstore calls nextValue for each row on a
|
||||
* given PM for a group (GROUP BY). subEvaluate is called on the
|
||||
* UM to consolodate those values into a single instance of
|
||||
* userData. Keep your aggregated totals in context's userData.
|
||||
* The first time this is called for a group, reset() would have
|
||||
* been called with this version of userData.
|
||||
*
|
||||
* Called for every partial data set in each group in GROUP BY.
|
||||
*
|
||||
* When subEvaluate has been called for all subAggregated data
|
||||
* sets, Evaluate will be called with the same context as here.
|
||||
*
|
||||
* valIn (In) - This is a pointer to a memory block of the size
|
||||
* set in setUserDataSize. It will contain the value of userData
|
||||
* as seen in the last call to NextValue for a given PM.
|
||||
*
|
||||
*/
|
||||
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn);
|
||||
/**
|
||||
* subEvaluate()
|
||||
*
|
||||
* Mandatory -- Called if the UDAF is running in a distributed
|
||||
* fashion. Columnstore tries to run all aggregate functions
|
||||
* distributed, depending on context.
|
||||
*
|
||||
* Perform an aggregation on rows partially aggregated by
|
||||
* nextValue. Columnstore calls nextValue for each row on a
|
||||
* given PM for a group (GROUP BY). subEvaluate is called on the
|
||||
* UM to consolodate those values into a single instance of
|
||||
* userData. Keep your aggregated totals in context's userData.
|
||||
* The first time this is called for a group, reset() would have
|
||||
* been called with this version of userData.
|
||||
*
|
||||
* Called for every partial data set in each group in GROUP BY.
|
||||
*
|
||||
* When subEvaluate has been called for all subAggregated data
|
||||
* sets, Evaluate will be called with the same context as here.
|
||||
*
|
||||
* valIn (In) - This is a pointer to a memory block of the size
|
||||
* set in setUserDataSize. It will contain the value of userData
|
||||
* as seen in the last call to NextValue for a given PM.
|
||||
*
|
||||
*/
|
||||
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn);
|
||||
|
||||
/**
|
||||
* evaluate()
|
||||
*
|
||||
* Mandatory. Get the aggregated value.
|
||||
*
|
||||
* Called for every new group if UDAF GROUP BY, UDAnF partition
|
||||
* or, in some cases, new Window Frame.
|
||||
*
|
||||
* Set the aggregated value into valOut. The datatype is assumed
|
||||
* to be the same as that set in the init() function;
|
||||
*
|
||||
* If the UDAF is running in a distributed fashion, evaluate is
|
||||
* called after a series of subEvaluate calls.
|
||||
*
|
||||
* valOut (out) - Set the aggregated value here. The datatype is
|
||||
* assumed to be the same as that set in the init() function;
|
||||
*
|
||||
* To return a NULL value, don't assign to valOut.
|
||||
*/
|
||||
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
|
||||
/**
|
||||
* evaluate()
|
||||
*
|
||||
* Mandatory. Get the aggregated value.
|
||||
*
|
||||
* Called for every new group if UDAF GROUP BY, UDAnF partition
|
||||
* or, in some cases, new Window Frame.
|
||||
*
|
||||
* Set the aggregated value into valOut. The datatype is assumed
|
||||
* to be the same as that set in the init() function;
|
||||
*
|
||||
* If the UDAF is running in a distributed fashion, evaluate is
|
||||
* called after a series of subEvaluate calls.
|
||||
*
|
||||
* valOut (out) - Set the aggregated value here. The datatype is
|
||||
* assumed to be the same as that set in the init() function;
|
||||
*
|
||||
* To return a NULL value, don't assign to valOut.
|
||||
*/
|
||||
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
|
||||
|
||||
/**
|
||||
* dropValue()
|
||||
*
|
||||
* Optional -- If defined, the server will call this instead of
|
||||
* reset for UDAnF.
|
||||
*
|
||||
* Don't implement if a UDAnF has one or more of the following:
|
||||
* The UDAnF can't be used with a Window Frame
|
||||
* The UDAnF is not reversable in some way
|
||||
* The UDAnF is not interested in optimal performance
|
||||
*
|
||||
* If not implemented, reset() followed by a series of
|
||||
* nextValue() will be called for each movement of the Window
|
||||
* Frame.
|
||||
*
|
||||
* If implemented, then each movement of the Window Frame will
|
||||
* result in dropValue() being called for each row falling out
|
||||
* of the Frame and nextValue() being called for each new row
|
||||
* coming into the Frame.
|
||||
*
|
||||
* valsDropped (in) - a vector of the parameters from the row
|
||||
* leaving the Frame
|
||||
*
|
||||
* dropValue() will not be called for unbounded/current row type
|
||||
* frames, as those are already optimized.
|
||||
*/
|
||||
virtual ReturnCode dropValue(mcsv1Context* context,
|
||||
std::vector<ColumnDatum>& valsDropped);
|
||||
/**
|
||||
* dropValue()
|
||||
*
|
||||
* Optional -- If defined, the server will call this instead of
|
||||
* reset for UDAnF.
|
||||
*
|
||||
* Don't implement if a UDAnF has one or more of the following:
|
||||
* The UDAnF can't be used with a Window Frame
|
||||
* The UDAnF is not reversable in some way
|
||||
* The UDAnF is not interested in optimal performance
|
||||
*
|
||||
* If not implemented, reset() followed by a series of
|
||||
* nextValue() will be called for each movement of the Window
|
||||
* Frame.
|
||||
*
|
||||
* If implemented, then each movement of the Window Frame will
|
||||
* result in dropValue() being called for each row falling out
|
||||
* of the Frame and nextValue() being called for each new row
|
||||
* coming into the Frame.
|
||||
*
|
||||
* valsDropped (in) - a vector of the parameters from the row
|
||||
* leaving the Frame
|
||||
*
|
||||
* dropValue() will not be called for unbounded/current row type
|
||||
* frames, as those are already optimized.
|
||||
*/
|
||||
virtual ReturnCode dropValue(mcsv1Context* context,
|
||||
std::vector<ColumnDatum>& valsDropped);
|
||||
|
||||
/**
|
||||
* createUserData()
|
||||
*
|
||||
* Optional -- If defined, the server will call this instead of
|
||||
* createUserData on context.
|
||||
*
|
||||
* Create your variable length data structure via
|
||||
* data = new <datatype>
|
||||
*
|
||||
* The data structure may contain references to containers or
|
||||
* pointers to other objects. Remember that for distributed
|
||||
* processing, this may be called multiple times for variaous
|
||||
* computing blocks. At the least, it will be called once per PM
|
||||
* that processes the data, and once more for the UM. For UDAnF,
|
||||
* it may only be called once.
|
||||
*
|
||||
* Set length to the length of the data structure you create.
|
||||
*
|
||||
* For each call to createUserData(), there will be a
|
||||
* corresponding deleteUserData() where you must clean up. Any
|
||||
* memory leaks are your fault.
|
||||
*
|
||||
*/
|
||||
virtual ReturnCode createUserData(UserData*& data, int32_t& length);
|
||||
/**
|
||||
* createUserData()
|
||||
*
|
||||
* Optional -- If defined, the server will call this instead of
|
||||
* createUserData on context.
|
||||
*
|
||||
* Create your variable length data structure via
|
||||
* data = new <datatype>
|
||||
*
|
||||
* The data structure may contain references to containers or
|
||||
* pointers to other objects. Remember that for distributed
|
||||
* processing, this may be called multiple times for variaous
|
||||
* computing blocks. At the least, it will be called once per PM
|
||||
* that processes the data, and once more for the UM. For UDAnF,
|
||||
* it may only be called once.
|
||||
*
|
||||
* Set length to the length of the data structure you create.
|
||||
*
|
||||
* For each call to createUserData(), there will be a
|
||||
* corresponding deleteUserData() where you must clean up. Any
|
||||
* memory leaks are your fault.
|
||||
*
|
||||
*/
|
||||
virtual ReturnCode createUserData(UserData*& data, int32_t& length);
|
||||
protected:
|
||||
};
|
||||
|
||||
|
Reference in New Issue
Block a user