You've already forked mariadb-columnstore-engine
							
							
				mirror of
				https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
				synced 2025-10-30 07:25:34 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			285 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			C++
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			285 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			C++
		
	
	
		
			Executable File
		
	
	
	
	
| /* Copyright (C) 2017 MariaDB Corporaton
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or
 | |
|    modify it under the terms of the GNU General Public License
 | |
|    as published by the Free Software Foundation; version 2 of
 | |
|    the License.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | |
|    MA 02110-1301, USA. */
 | |
| 
 | |
| /***********************************************************************
 | |
| *   $Id$
 | |
| *
 | |
| *   mcsv1_UDAF.h
 | |
| ***********************************************************************/
 | |
| 
 | |
| /** 
 | |
|  * Columnstore interface for writing a User Defined Aggregate 
 | |
|  * Functions (UDAF) and User Defined Analytic Functions (UDAnF) 
 | |
|  * or a function that can act as either - UDA(n)F 
 | |
|  *  
 | |
|  * The basic steps are:
 | |
|  *
 | |
|  * 1. Create a the UDA(n)F function interface in some .h file. 
 | |
|  * 2. Create the UDF function implementation in some .cpp file 
 | |
|  * 3. Create the connector stub (MariaDB UDAF definition) for 
 | |
|  * this UDF function.  
 | |
|  * 4. build the dynamic library using all of the source. 
 | |
|  * 5  Put the library in $COLUMNSTORE_INSTALL/lib of 
 | |
|  * all modules 
 | |
|  * 6. restart the Columnstore system. 
 | |
|  * 7. notify mysqld about the new function:
 | |
|  *  
 | |
|  *    CREATE AGGREGATE FUNCTION avg_mode returns REAL soname
 | |
|  *    'libudf_mysql.so';
 | |
|  *  
 | |
|  * The UDAF functions may run distributed in the Columnstore 
 | |
|  * engine. UDAnF do not run distributed. 
 | |
|  *  
 | |
|  * UDAF is User Defined Aggregate Function. 
 | |
|  * UDAnF is User Defined Analytic Function. 
 | |
|  * UDA(n)F is an acronym for a function that could be either. It 
 | |
|  * is also used to describe the interface that is used for 
 | |
|  * either. 
 | |
|  */
 | |
| #ifndef HEADER_mode
 | |
| #define HEADER_mode
 | |
| 
 | |
| #include <cstdlib>
 | |
| #include <string>
 | |
| #include <vector>
 | |
| #include <boost/any.hpp>
 | |
| #ifdef _MSC_VER
 | |
| #include <unordered_map>
 | |
| #else
 | |
| #include <tr1/unordered_map>
 | |
| #endif
 | |
| 
 | |
| #include "mcsv1_udaf.h"
 | |
| #include "calpontsystemcatalog.h"
 | |
| #include "windowfunctioncolumn.h"
 | |
| using namespace execplan;
 | |
| 
 | |
| #if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
 | |
| #define EXPORT __declspec(dllexport)
 | |
| #else
 | |
| #define EXPORT
 | |
| #endif
 | |
| 
 | |
| namespace mcsv1sdk
 | |
| {
 | |
| 
 | |
| #define DATATYPE double
 | |
| typedef std::tr1::unordered_map<DATATYPE, uint32_t> MODE_DATA;
 | |
| 
 | |
| // Override UserData for data storage
 | |
| struct ModeData : public UserData
 | |
| {
 | |
| 	ModeData() {};
 | |
| 
 | |
| 	virtual ~ModeData(){}
 | |
| 
 | |
| 	virtual void serialize(messageqcpp::ByteStream& bs) const;
 | |
| 	virtual void unserialize(messageqcpp::ByteStream& bs);
 | |
| 
 | |
| 	MODE_DATA mData;
 | |
| private:
 | |
| 	// For now, copy construction is unwanted
 | |
| 	ModeData(UserData&);
 | |
| };
 | |
| 
 | |
| // Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or 
 | |
| // User Defined Analytic Function (UDAnF).
 | |
| // These will be singleton classes, so don't put any instance
 | |
| // specific data in here. All instance data is stored in mcsv1Context
 | |
| // passed to each user function and retrieved by the getUserData() method.
 | |
| // 
 | |
| // Each API function returns a ReturnCode. If ERROR is returned at any time, 
 | |
| // the query is aborted, getInterrupted() will begin to return true and the 
 | |
| // message set in config->setErrorMessage() is returned to MariaDB. 
 | |
| 
 | |
| // Return the avg_mode value of the dataset
 | |
| 
 | |
| class avg_mode : public  mcsv1_UDAF
 | |
| {
 | |
| public:
 | |
| 	// Defaults OK
 | |
| 	avg_mode() : mcsv1_UDAF(){};
 | |
| 	virtual ~avg_mode(){};
 | |
| 
 | |
| 	/** 
 | |
| 	 * init() 
 | |
| 	 *  
 | |
| 	 * Mandatory. Implement this to initialize flags and instance 
 | |
| 	 * data. Called once per SQL statement. You can do any sanity 
 | |
| 	 * checks here. 
 | |
| 	 *  
 | |
| 	 * colTypes (in) - A vector of ColDataType defining the 
 | |
| 	 * parameters of the UDA(n)F call. These can be used to decide 
 | |
| 	 * to override the default return type. If desired, the new 
 | |
| 	 * return type can be set by context->setReturnType() and 
 | |
| 	 * decimal scale and precision can be set by context->setScale 
 | |
| 	 * and context->setPrecision respectively. 
 | |
| 	 *  
 | |
| 	 * Return mcsv1_UDAF::ERROR on any error, such as non-compatible
 | |
| 	 * colTypes or wrong number of arguments. Else return 
 | |
| 	 * mcsv1_UDAF::SUCCESS. 
 | |
| 	 */
 | |
| 	virtual ReturnCode init(mcsv1Context* context,
 | |
| 							COL_TYPES& colTypes);
 | |
| 
 | |
| 	/** 
 | |
| 	 * reset() 
 | |
| 	 *  
 | |
| 	 * Mandatory. Reset the UDA(n)F for a new group, partition or, 
 | |
| 	 * in some cases, new Window Frame. Do not free any memory 
 | |
| 	 * allocated by context->setUserDataSize(). The SDK Framework owns 
 | |
| 	 * that memory and will handle that. Use this opportunity to 
 | |
| 	 * reset any variables in context->getUserData() needed for the 
 | |
| 	 * next aggregation. May be called multiple times if running in 
 | |
| 	 * a ditributed fashion. 
 | |
| 	 *  
 | |
| 	 * Use this opportunity to initialize the userData.
 | |
| 	 */
 | |
| 	virtual ReturnCode reset(mcsv1Context* context);
 | |
| 
 | |
| 	/** 
 | |
| 	 * nextValue() 
 | |
| 	 *  
 | |
| 	 * Mandatory. Handle a single row. 
 | |
| 	 *  
 | |
| 	 * colsIn - A vector of data structure describing the input 
 | |
| 	 * data. 
 | |
| 	 *  
 | |
| 	 * This function is called once for every row in the filtered 
 | |
| 	 * result set (before aggregation). It is very important that 
 | |
| 	 * this function is efficient. 
 | |
| 	 *  
 | |
| 	 * If the UDAF is running in a distributed fashion, nextValue 
 | |
| 	 * cannot depend on order, as it will only be called for each 
 | |
| 	 * row found on the specific PM. 
 | |
| 	 *  
 | |
| 	 * valsIn (in) - a vector of the parameters from the row.
 | |
| 	 */
 | |
| 	virtual ReturnCode nextValue(mcsv1Context* context, 
 | |
| 								 std::vector<ColumnDatum>& valsIn);
 | |
| 
 | |
| 	 /** 
 | |
| 	  * subEvaluate() 
 | |
| 	  *  
 | |
| 	  * Mandatory -- Called if the UDAF is running in a distributed 
 | |
| 	  * fashion. Columnstore tries to run all aggregate functions 
 | |
| 	  * distributed, depending on context. 
 | |
| 	  *  
 | |
| 	  * Perform an aggregation on rows partially aggregated by 
 | |
| 	  * nextValue. Columnstore calls nextValue for each row on a 
 | |
| 	  * given PM for a group (GROUP BY). subEvaluate is called on the
 | |
| 	  * UM to consolodate those values into a single instance of 
 | |
| 	  * userData. Keep your aggregated totals in context's userData. 
 | |
| 	  * The first time this is called for a group, reset() would have 
 | |
| 	  * been called with this version of userData. 
 | |
| 	  *  
 | |
| 	  * Called for every partial data set in each group in GROUP BY.
 | |
| 	  *  
 | |
| 	  * When subEvaluate has been called for all subAggregated data 
 | |
| 	  * sets, Evaluate will be called with the same context as here.
 | |
| 	  *  
 | |
| 	  * valIn (In) - This is a pointer to a memory block of the size 
 | |
| 	  * set in setUserDataSize. It will contain the value of userData 
 | |
| 	  * as seen in the last call to NextValue for a given PM.
 | |
| 	  *  
 | |
| 	  */
 | |
| 	 virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn);
 | |
| 
 | |
| 	/** 
 | |
| 	 * evaluate() 
 | |
| 	 *  
 | |
| 	 * Mandatory. Get the aggregated value.
 | |
| 	 *  
 | |
| 	 * Called for every new group if UDAF GROUP BY, UDAnF partition 
 | |
| 	 * or, in some cases, new Window Frame. 
 | |
| 	 *  
 | |
| 	 * Set the aggregated value into valOut. The datatype is assumed
 | |
| 	 * to be the same as that set in the init() function; 
 | |
| 	 *  
 | |
| 	 * If the UDAF is running in a distributed fashion, evaluate is 
 | |
| 	 * called after a series of subEvaluate calls. 
 | |
| 	 *  
 | |
| 	 * valOut (out) - Set the aggregated value here. The datatype is
 | |
| 	 * assumed to be the same as that set in the init() function; 
 | |
| 	 *  
 | |
| 	 * To return a NULL value, don't assign to valOut.
 | |
| 	 */
 | |
| 	virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
 | |
| 
 | |
| 	/** 
 | |
| 	 * dropValue() 
 | |
| 	 *  
 | |
| 	 * Optional -- If defined, the server will call this instead of 
 | |
| 	 * reset for UDAnF. 
 | |
| 	 *  
 | |
| 	 * Don't implement if a UDAnF has one or more of the following: 
 | |
| 	 * The UDAnF can't be used with a Window Frame
 | |
| 	 * The UDAnF is not reversable in some way 
 | |
| 	 * The UDAnF is not interested in optimal performance 
 | |
| 	 *  
 | |
| 	 * If not implemented, reset() followed by a series of 
 | |
| 	 * nextValue() will be called for each movement of the Window 
 | |
| 	 * Frame. 
 | |
| 	 *  
 | |
| 	 * If implemented, then each movement of the Window Frame will 
 | |
| 	 * result in dropValue() being called for each row falling out 
 | |
| 	 * of the Frame and nextValue() being called for each new row 
 | |
| 	 * coming into the Frame. 
 | |
| 	 *  
 | |
| 	 * valsDropped (in) - a vector of the parameters from the row 
 | |
| 	 * leaving the Frame 
 | |
| 	 *  
 | |
| 	 * dropValue() will not be called for unbounded/current row type
 | |
| 	 * frames, as those are already optimized. 
 | |
| 	 */
 | |
| 	virtual ReturnCode dropValue(mcsv1Context* context, 
 | |
| 								 std::vector<ColumnDatum>& valsDropped);
 | |
| 
 | |
| 	/** 
 | |
| 	 * createUserData()
 | |
| 	 *  
 | |
| 	 * Optional -- If defined, the server will call this instead of 
 | |
| 	 * createUserData on context. 
 | |
| 	 *  
 | |
| 	 * Create your variable length data structure via 
 | |
| 	 * data = new <datatype> 
 | |
| 	 *  
 | |
| 	 * The data structure may contain references to containers or 
 | |
| 	 * pointers to other objects. Remember that for distributed 
 | |
| 	 * processing, this may be called multiple times for variaous 
 | |
| 	 * computing blocks. At the least, it will be called once per PM 
 | |
| 	 * that processes the data, and once more for the UM. For UDAnF, 
 | |
| 	 * it may only be called once. 
 | |
| 	 *  
 | |
| 	 * Set length to the length of the data structure you create.
 | |
| 	 *  
 | |
| 	 * For each call to createUserData(), there will be a 
 | |
| 	 * corresponding deleteUserData() where you must clean up. Any 
 | |
| 	 * memory leaks are your fault. 
 | |
| 	 *  
 | |
| 	 */ 
 | |
| 	virtual ReturnCode createUserData(UserData*& data, int32_t& length);
 | |
| protected:
 | |
| };
 | |
| 
 | |
| };  // namespace
 | |
| 
 | |
| #undef EXPORT
 | |
| 
 | |
| #endif // HEADER_mode.h
 | |
| 
 |