MCOL-523 Add UDAF and UDAnF SDK

2025-07-30 19:23:07 +03:00 · 2017-07-26 11:53:08 -05:00
parent 630b113565
commit bc2a4e7795
75 changed files with 10250 additions and 4523 deletions
--- a/utils/windowfunction/CMakeLists.txt
+++ b/utils/windowfunction/CMakeLists.txt
@ -21,7 +21,8 @@ set(windowfunction_LIB_SRCS
    wf_ranking.cpp
    wf_row_number.cpp
    wf_stats.cpp
-    wf_sum_avg.cpp)
+    wf_sum_avg.cpp
+    wf_udaf.cpp)

 add_library(windowfunction SHARED ${windowfunction_LIB_SRCS})

--- a/utils/windowfunction/wf_udaf.cpp
+++ b/utils/windowfunction/wf_udaf.cpp
@ -0,0 +1,508 @@
+/************************************************************************************
+  Copyright (C) 2017 MariaDB Corporation AB
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Library General Public
+  License as published by the Free Software Foundation; either
+  version 2 of the License, or (at your option) any later version.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Library General Public License for more details.
+
+  You should have received a copy of the GNU Library General Public
+  License along with this library; if not see <http://www.gnu.org/licenses>
+  or write to the Free Software Foundation, Inc.,
+  51 Franklin St., Fifth Floor, Boston, MA 02110, USA
+ *************************************************************************************/
+
+
+//#define NDEBUG
+#include <cassert>
+#include <cmath>
+#include <sstream>
+#include <iomanip>
+using namespace std;
+
+#include <boost/shared_ptr.hpp>
+using namespace boost;
+
+#include "loggingid.h"
+#include "errorcodes.h"
+#include "idberrorinfo.h"
+using namespace logging;
+
+#include "rowgroup.h"
+using namespace rowgroup;
+
+#include "idborderby.h"
+using namespace ordering;
+
+#include "joblisttypes.h"
+#include "calpontsystemcatalog.h"
+#include "constantcolumn.h"
+using namespace execplan;
+
+#include "windowfunctionstep.h"
+using namespace joblist;
+
+#include "wf_udaf.h"
+
+
+namespace windowfunction
+{
+template<typename T>
+boost::shared_ptr<WindowFunctionType> WF_udaf<T>::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context)
+{
+	boost::shared_ptr<WindowFunctionType> func;
+	switch (ct)
+	{
+		case CalpontSystemCatalog::TINYINT:
+		case CalpontSystemCatalog::SMALLINT:
+		case CalpontSystemCatalog::MEDINT:
+		case CalpontSystemCatalog::INT:
+		case CalpontSystemCatalog::BIGINT:
+		case CalpontSystemCatalog::DECIMAL:
+		{
+			func.reset(new WF_udaf<int64_t>(id, name, context));
+			break;
+		}
+		case CalpontSystemCatalog::UTINYINT:
+		case CalpontSystemCatalog::USMALLINT:
+		case CalpontSystemCatalog::UMEDINT:
+		case CalpontSystemCatalog::UINT:
+		case CalpontSystemCatalog::UBIGINT:
+		case CalpontSystemCatalog::UDECIMAL:
+		{
+			func.reset(new WF_udaf<uint64_t>(id, name, context));
+			break;
+		}
+		case CalpontSystemCatalog::DOUBLE:
+		case CalpontSystemCatalog::UDOUBLE:
+		{
+			func.reset(new WF_udaf<double>(id, name, context));
+			break;
+		}
+		case CalpontSystemCatalog::FLOAT:
+		case CalpontSystemCatalog::UFLOAT:
+		{
+			func.reset(new WF_udaf<float>(id, name, context));
+			break;
+		}
+		case CalpontSystemCatalog::CHAR:
+		case CalpontSystemCatalog::VARCHAR:
+		case CalpontSystemCatalog::VARBINARY:
+		case CalpontSystemCatalog::TEXT:
+		case CalpontSystemCatalog::BLOB:
+		{
+			func.reset(new WF_udaf<string>(id, name, context));
+			break;
+		}
+		default:
+		{
+			string errStr = name + "(" + colType2String[ct] + ")";
+			errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr);
+			cerr << errStr << endl;
+			throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE);
+
+			break;
+		}
+	}
+
+	// Get the UDAnF function object
+	WF_udaf* wfUDAF = (WF_udaf*)func.get();
+	mcsv1sdk::mcsv1Context& udafContext = wfUDAF->getContext();
+	udafContext.setInterrupted(wfUDAF->getInterruptedPtr());
+	wfUDAF->resetData();
+	return func;
+}
+
+template<typename T>
+WF_udaf<T>::WF_udaf(WF_udaf& rhs) : fUDAFContext(rhs.getContext()),
+	bInterrupted(rhs.getInterrupted()),
+	fDistinct(rhs.getDistinct())
+{
+	getContext().setInterrupted(getInterruptedPtr());
+}
+
+template<typename T>
+WindowFunctionType* WF_udaf<T>::clone() const
+{
+	return new WF_udaf(*const_cast<WF_udaf*>(this));
+}
+
+template<typename T>
+void WF_udaf<T>::resetData()
+{
+	getContext().getFunction()->reset(&getContext());
+	fSet.clear();
+	WindowFunctionType::resetData();
+}
+
+template<typename T>
+void WF_udaf<T>::parseParms(const std::vector<execplan::SRCP>& parms)
+{
+    bRespectNulls = true;
+	// parms[1]: respect null | ignore null
+	ConstantColumn* cc = dynamic_cast<ConstantColumn*>(parms[1].get());
+	idbassert(cc != NULL);
+	bool isNull = false;  // dummy, harded coded
+	bRespectNulls = (cc->getIntVal(fRow, isNull) > 0);
+}
+
+template<typename T>
+bool WF_udaf<T>::dropValues(int64_t b, int64_t e)
+{
+	if (!bHasDropValue)
+	{
+		// Save work if we discovered dropValue is not implemented in the UDAnF
+		return false;
+	}
+
+	mcsv1sdk::mcsv1_UDAF::ReturnCode rc;
+	uint64_t colOut = fFieldIndex[0];
+	uint64_t colIn = fFieldIndex[1];
+
+	mcsv1sdk::ColumnDatum datum;
+	datum.dataType = fRow.getColType(colIn);
+	datum.scale = fRow.getScale(colIn);
+	datum.precision = fRow.getPrecision(colOut);
+
+	for (int64_t i = b; i < e; i++)
+	{
+		if (i % 1000 == 0 && fStep->cancelled())
+			break;
+
+		fRow.setData(getPointer(fRowData->at(i)));
+		// Turn on NULL flags
+		std::vector<uint32_t> flags;
+		uint32_t flag = 0;
+		if (fRow.isNullValue(colIn) == true)
+		{
+			if (!bRespectNulls)
+			{
+				continue;
+			}
+			flag |= mcsv1sdk::PARAM_IS_NULL;
+		}
+		flags.push_back(flag);
+		getContext().setDataFlags(&flags);
+
+		T valIn;
+		getValue(colIn, valIn, &datum.dataType);
+
+		// Check for distinct, if turned on.
+		// TODO: when we impliment distinct, we need to revist this.
+		if ((fDistinct) || (fSet.find(valIn) != fSet.end()))
+		{
+			continue;
+		}
+
+		datum.columnData = valIn;
+
+		std::vector<mcsv1sdk::ColumnDatum> valsIn;
+		valsIn.push_back(datum);
+
+		rc = getContext().getFunction()->dropValue(&getContext(), valsIn);
+		if (rc == mcsv1sdk::mcsv1_UDAF::NOT_IMPLEMENTED)
+		{
+			bHasDropValue = false;
+			return false;
+		}
+		if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
+		{
+			bInterrupted = true;
+			string errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_UDANF_ERROR, getContext().getErrorMessage());
+			cerr << errStr << endl;
+			throw IDBExcept(errStr, ERR_WF_UDANF_ERROR);
+		}
+	}
+
+	return true;
+}
+
+// Sets the value from valOut into column colOut, performing any conversions.
+template<typename T>
+void WF_udaf<T>::SetUDAFValue(static_any::any& valOut, int64_t colOut,
+							  int64_t b, int64_t e, int64_t c)
+{
+	static const static_any::any& charTypeId = (char)1;
+	static const static_any::any& scharTypeId = (signed char)1;
+	static const static_any::any& shortTypeId = (short)1;
+	static const static_any::any& intTypeId = (int)1;
+	static const static_any::any& longTypeId = (long)1;
+	static const static_any::any& llTypeId = (long long)1;
+	static const static_any::any& ucharTypeId = (unsigned char)1;
+	static const static_any::any& ushortTypeId = (unsigned short)1;
+	static const static_any::any& uintTypeId = (unsigned int)1;
+	static const static_any::any& ulongTypeId = (unsigned long)1;
+	static const static_any::any& ullTypeId = (unsigned long long)1;
+	static const static_any::any& floatTypeId = (float)1;
+	static const static_any::any& doubleTypeId = (double)1;
+	static const std::string typeStr("");
+	static const static_any::any& strTypeId = typeStr;
+
+	CDT colDataType = fRow.getColType(colOut);
+	if (valOut.empty())
+	{
+		// If valOut is empty, we return NULL
+		T* pv = NULL;
+		setValue(colDataType, b, e, c, pv);
+		fPrev = c;
+		return;
+	}
+
+	// This may seem a bit convoluted. Users shouldn't return a type
+	// that they didn't set in mcsv1_UDAF::init(), but this
+	// handles whatever return type is given and casts
+	// it to whatever they said to return.
+	int64_t intOut = 0;
+	uint64_t uintOut = 0;
+	float floatOut = 0.0;
+	double doubleOut = 0.0;
+	ostringstream oss;
+	std::string strOut;
+
+	if (valOut.compatible(charTypeId))
+	{
+		uintOut = intOut  = valOut.cast<char>();
+		floatOut = intOut;
+		oss << intOut;
+	}
+	else if (valOut.compatible(scharTypeId))
+	{
+		uintOut = intOut = valOut.cast<signed char>();
+		floatOut = intOut;
+		oss << intOut;
+	}
+	else if (valOut.compatible(shortTypeId))
+	{
+		uintOut = intOut = valOut.cast<short>();
+		floatOut = intOut;
+		oss << intOut;
+	}
+	else if (valOut.compatible(intTypeId))
+	{
+		uintOut = intOut = valOut.cast<int>();
+		floatOut = intOut;
+		oss << intOut;
+	}
+	else if (valOut.compatible(longTypeId))
+	{
+		uintOut = intOut = valOut.cast<long>();
+		floatOut = intOut;
+		oss << intOut;
+	}
+	else if (valOut.compatible(llTypeId))
+	{
+		uintOut = intOut = valOut.cast<long long>();
+		floatOut = intOut;
+		oss << intOut;
+	}
+	else if (valOut.compatible(ucharTypeId))
+	{
+		intOut = uintOut = valOut.cast<unsigned char>();
+		floatOut = uintOut;
+		oss << uintOut;
+	}
+	else if (valOut.compatible(ushortTypeId))
+	{
+		intOut = uintOut = valOut.cast<unsigned short>();
+		floatOut = uintOut;
+		oss << uintOut;
+	}
+	else if (valOut.compatible(uintTypeId))
+	{
+		intOut = uintOut = valOut.cast<unsigned int>();
+		floatOut = uintOut;
+		oss << uintOut;
+	}
+	else if (valOut.compatible(ulongTypeId))
+	{
+		intOut = uintOut = valOut.cast<unsigned long>();
+		floatOut = uintOut;
+		oss << uintOut;
+	}
+	else if (valOut.compatible(ullTypeId))
+	{
+		intOut = uintOut = valOut.cast<unsigned long long>();
+		floatOut = uintOut;
+		oss << uintOut;
+	}
+	else if (valOut.compatible(floatTypeId))
+	{
+		floatOut = valOut.cast<float>();
+		doubleOut = floatOut;
+		intOut = uintOut = floatOut;
+		oss << floatOut;
+	}
+	else if (valOut.compatible(doubleTypeId))
+	{
+		doubleOut = valOut.cast<double>();
+		floatOut = (float)doubleOut;
+		uintOut = (uint64_t)doubleOut;
+		intOut = (int64_t)doubleOut;
+		oss << doubleOut;
+	}
+
+	if (valOut.compatible(strTypeId))
+	{
+		std::string strOut = valOut.cast<std::string>();
+		// Convert the string to numeric type, just in case.
+		intOut = atol(strOut.c_str());
+		uintOut = strtoul(strOut.c_str(), NULL, 10);
+		doubleOut = strtod(strOut.c_str(), NULL);
+		floatOut = (float)doubleOut;
+	}
+	else
+	{
+		strOut = oss.str();
+	}
+
+	switch (colDataType)
+	{
+		case execplan::CalpontSystemCatalog::BIT:
+		case execplan::CalpontSystemCatalog::TINYINT:
+		case execplan::CalpontSystemCatalog::SMALLINT:
+		case execplan::CalpontSystemCatalog::MEDINT:
+		case execplan::CalpontSystemCatalog::INT:
+		case execplan::CalpontSystemCatalog::BIGINT:
+		case execplan::CalpontSystemCatalog::DECIMAL:
+		case execplan::CalpontSystemCatalog::UDECIMAL:
+			setValue(colDataType, b, e, c, &intOut);
+			break;
+		case execplan::CalpontSystemCatalog::UTINYINT:
+		case execplan::CalpontSystemCatalog::USMALLINT:
+		case execplan::CalpontSystemCatalog::UMEDINT:
+		case execplan::CalpontSystemCatalog::UINT:
+		case execplan::CalpontSystemCatalog::UBIGINT:
+		case execplan::CalpontSystemCatalog::DATE:
+		case execplan::CalpontSystemCatalog::DATETIME:
+			setValue(colDataType, b, e, c, &uintOut);
+			break;
+		case execplan::CalpontSystemCatalog::FLOAT:
+		case execplan::CalpontSystemCatalog::UFLOAT:
+			setValue(colDataType, b, e, c, &floatOut);
+			break;
+		case execplan::CalpontSystemCatalog::DOUBLE:
+		case execplan::CalpontSystemCatalog::UDOUBLE:
+			setValue(colDataType, b, e, c, &doubleOut);
+			break;
+		case execplan::CalpontSystemCatalog::CHAR:
+		case execplan::CalpontSystemCatalog::VARCHAR:
+		case execplan::CalpontSystemCatalog::TEXT:
+		case execplan::CalpontSystemCatalog::VARBINARY:
+		case execplan::CalpontSystemCatalog::CLOB:
+		case execplan::CalpontSystemCatalog::BLOB:
+			setValue(colDataType, b, e, c, &strOut);
+			break;
+		default:
+		{
+			std::ostringstream errmsg;
+			errmsg << "WF_udaf: No logic for data type: " << colDataType;
+			cerr << errmsg.str() << endl;
+			throw runtime_error(errmsg.str().c_str());
+			break;
+		}
+	}
+}
+
+template<typename T>
+void WF_udaf<T>::operator()(int64_t b, int64_t e, int64_t c)
+{
+	mcsv1sdk::mcsv1_UDAF::ReturnCode rc;
+	uint64_t colOut = fFieldIndex[0];
+	static_any::any valOut;
+
+	if ((fFrameUnit == WF__FRAME_ROWS) ||
+		(fPrev == -1) ||
+		(!fPeer->operator()(getPointer(fRowData->at(c)), getPointer(fRowData->at(fPrev)))))
+	{
+		// for unbounded - current row special handling
+		if (fPrev >= b && fPrev < c)
+			b = c;
+		else if (fPrev <= e && fPrev > c)
+			e = c;
+
+		uint64_t colIn = fFieldIndex[1];
+
+		mcsv1sdk::ColumnDatum datum;
+		datum.dataType = fRow.getColType(colIn);
+		datum.scale = fRow.getScale(colIn);
+		datum.precision = fRow.getPrecision(colOut);
+
+		if (b<=c && c<=e)
+			getContext().setContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW);
+		else
+			getContext().clearContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW);
+
+
+		for (int64_t i = b; i <= e; i++)
+		{
+			if (i % 1000 == 0 && fStep->cancelled())
+				break;
+
+			fRow.setData(getPointer(fRowData->at(i)));
+			// Turn on NULL flags
+			std::vector<uint32_t> flags;
+			uint32_t flag = 0;
+			if (fRow.isNullValue(colIn) == true)
+			{
+				if (!bRespectNulls)
+				{
+					continue;
+				}
+				flag |= mcsv1sdk::PARAM_IS_NULL;
+			}
+			flags.push_back(flag);
+			getContext().setDataFlags(&flags);
+
+			T valIn;
+			getValue(colIn, valIn, &datum.dataType);
+
+			// Check for distinct, if turned on.
+			if ((fDistinct) || (fSet.find(valIn) != fSet.end()))
+			{
+				continue;
+			}
+
+			if (fDistinct)
+				fSet.insert(valIn);
+
+			datum.columnData = valIn;
+
+			std::vector<mcsv1sdk::ColumnDatum> valsIn;
+			valsIn.push_back(datum);
+
+			rc = getContext().getFunction()->nextValue(&getContext(), valsIn);
+			if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
+			{
+				bInterrupted = true;
+				string errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_UDANF_ERROR, getContext().getErrorMessage());
+				cerr << errStr << endl;
+				throw IDBExcept(errStr, ERR_WF_UDANF_ERROR);
+			}
+		}
+
+		rc = getContext().getFunction()->evaluate(&getContext(), fValOut);
+		if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
+		{
+			bInterrupted = true;
+			string errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_UDANF_ERROR, getContext().getErrorMessage());
+			cerr << errStr << endl;
+			throw IDBExcept(errStr, ERR_WF_UDANF_ERROR);
+		}
+	}
+
+	SetUDAFValue(fValOut, colOut, b, e, c);
+
+	fPrev = c;
+}
+
+template
+boost::shared_ptr<WindowFunctionType> WF_udaf<int64_t>::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context);
+
+}   //namespace
+// vim:ts=4 sw=4:
+
--- a/utils/windowfunction/wf_udaf.h
+++ b/utils/windowfunction/wf_udaf.h
@ -0,0 +1,77 @@
+/************************************************************************************
+  Copyright (C) 2017 MariaDB Corporation AB
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Library General Public
+  License as published by the Free Software Foundation; either
+  version 2 of the License, or (at your option) any later version.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Library General Public License for more details.
+
+  You should have received a copy of the GNU Library General Public
+  License along with this library; if not see <http://www.gnu.org/licenses>
+  or write to the Free Software Foundation, Inc.,
+  51 Franklin St., Fifth Floor, Boston, MA 02110, USA
+ *************************************************************************************/
+
+
+#ifndef UTILS_WF_UDAF_H
+#define UTILS_WF_UDAF_H
+
+#include <set>
+#include "windowfunctiontype.h"
+#include "mcsv1_udaf.h"
+
+
+namespace windowfunction
+{
+
+// A class to control the execution of User Define Analytic Functions (UDAnF)
+// as defined by a specialization of mcsv1sdk::mcsv1_UDAF
+// The template parameter is currently only used to support DISTINCT, as
+// as that is done via a set<T>
+template<typename T>
+class WF_udaf : public WindowFunctionType
+{
+public:
+	WF_udaf(int id, const std::string& name, mcsv1sdk::mcsv1Context& context) :
+		WindowFunctionType(id, name), fUDAFContext(context), fDistinct(false), bHasDropValue(true) {}
+	WF_udaf(WF_udaf& rhs);
+	// pure virtual in base
+	void operator()(int64_t b, int64_t e, int64_t c);
+	WindowFunctionType* clone() const;
+	void resetData();
+	void parseParms(const std::vector<execplan::SRCP>&);
+	virtual bool dropValues(int64_t, int64_t);
+
+	mcsv1sdk::mcsv1Context& getContext() {return fUDAFContext;}
+	bool getInterrupted() {return bInterrupted;}
+	bool getInterruptedPtr() {return &bInterrupted;}
+	bool getDistinct() {return fDistinct;}
+
+protected:
+	void SetUDAFValue(static_any::any& valOut, int64_t colOut, int64_t b, int64_t e, int64_t c);
+
+	mcsv1sdk::mcsv1Context fUDAFContext;  // The UDAF context
+	bool bInterrupted;                    // Shared by all the threads
+	bool fDistinct;
+	bool bRespectNulls;                   // respect null | ignore null
+	bool bHasDropValue;                   // Set to false when we discover the UDAnF doesn't implement dropValue. 	
+	std::set<T> fSet;                     // To hold distinct values
+	static_any::any fValOut;              // The return value
+
+public:
+	static boost::shared_ptr<WindowFunctionType> makeFunction(int id, const string& name, 
+															  int ct, mcsv1sdk::mcsv1Context& context);
+};
+
+
+} // namespace
+
+#endif  // UTILS_WF_UDAF_H
+
+// vim:ts=4 sw=4:
+
--- a/utils/windowfunction/windowfunction.cpp
+++ b/utils/windowfunction/windowfunction.cpp
@ -163,11 +163,35 @@ void WindowFunction::operator()()
 			}
 			else
 			{
+				pair<int64_t, int64_t> w;
+				pair<int64_t, int64_t> prevFrame;
+				int64_t b, e;
+				bool firstTime = true;
 				for (int64_t i = begin; i <= end && !fStep->cancelled(); i++)
 				{
-					pair<int64_t, int64_t> w = fFrame->getWindow(begin, end, i);
-					fFunctionType->resetData();
-					fFunctionType->operator()(w.first, w.second, i);
+					w = fFrame->getWindow(begin, end, i);
+					b = w.first;
+					e = w.second;
+					if (firstTime)
+					{
+						prevFrame = w;
+					}
+					// UDAnF functions may have a dropValue function implemented.
+					// If they do, we can optimize by calling dropValue() for those
+					// values leaving the window and nextValue for those entering, rather
+					// than a resetData() and then iterating over the entire window.
+					// Built-in functions may have this functionality added in the future.
+					if (fFunctionType->dropValues(prevFrame.first, w.first))
+					{
+						b = firstTime ? w.first : prevFrame.second+1;
+					}
+					else
+					{
+						fFunctionType->resetData();
+					}
+					fFunctionType->operator()(b, e, i);
+					prevFrame = w;
+					firstTime = false;
 				}
 			}
 		}
--- a/utils/windowfunction/windowfunctiontype.cpp
+++ b/utils/windowfunction/windowfunctiontype.cpp
@ -58,6 +58,7 @@ using namespace joblist;
 #include "wf_row_number.h"
 #include "wf_stats.h"
 #include "wf_sum_avg.h"
+#include "wf_udaf.h"

 namespace windowfunction
 {
@ -137,13 +138,16 @@ map<string, int> WindowFunctionType::windowFunctionId = assign::map_list_of
 	(string("REGR_SXX"),        WF__REGR_SXX)
 	(string("REGR_SXY"),        WF__REGR_SXY)
 	(string("REGR_SYY"),        WF__REGR_SYY)
+	(string("UDAF_FUNC"),       WF__UDAF)
 ;

 boost::shared_ptr<WindowFunctionType>
-	WindowFunctionType::makeWindowFunction(const string& name, int ct)
+	WindowFunctionType::makeWindowFunction(const string& name, int ct, WindowFunctionColumn* wc)
 {
 	boost::shared_ptr<WindowFunctionType> af;
 	int functionId = windowFunctionId[algorithm::to_upper_copy(name)];
+	// The template parameters here are dummies to execute the static makeFunction
+	// which sets the real type based on ct.
 	switch (functionId)
 	{
 		case WF__COUNT_ASTERISK:
@ -192,6 +196,9 @@ boost::shared_ptr<WindowFunctionType>
 		case WF__PERCENTILE_DISC:
 			af = WF_percentile<int64_t>::makeFunction(functionId, name, ct);
 			break;
+		case WF__UDAF:
+			af = WF_udaf<int64_t>::makeFunction(functionId, name, ct, wc->getUDAFContext());
+			break;
 		case WF__REGR_SLOPE:
 		case WF__REGR_INTERCEPT:
 		case WF__REGR_COUNT:
@ -211,7 +218,6 @@ boost::shared_ptr<WindowFunctionType>
 	return af;
 }

-
 const string WindowFunctionType::toString() const
 {
 	ostringstream oss;
@ -223,77 +229,81 @@ const string WindowFunctionType::toString() const
 	return oss.str();
 }

-
-template<typename T> void WindowFunctionType::getValue(uint64_t i, T& t)
+template<typename T> void WindowFunctionType::getValue(uint64_t i, T& t, CDT* cdt)
 {
 }

-
-template<> void WindowFunctionType::getValue<int64_t>(uint64_t i, int64_t& t)
+template<> void WindowFunctionType::getValue<int64_t>(uint64_t i, int64_t& t, CDT* cdt)
 {
 	t = fRow.getIntField(i);
+	if (cdt)
+	{
+		*cdt = execplan::CalpontSystemCatalog::BIGINT;
+	}
 }

-
-template<> void WindowFunctionType::getValue<uint64_t>(uint64_t i, uint64_t& t)
+template<> void WindowFunctionType::getValue<uint64_t>(uint64_t i, uint64_t& t, CDT* cdt)
 {
 	t = fRow.getUintField(i);
+	if (cdt)
+	{
+		*cdt = execplan::CalpontSystemCatalog::UBIGINT;
+	}
 }

-
-template<> void WindowFunctionType::getValue<double>(uint64_t i, double& t)
+template<> void WindowFunctionType::getValue<double>(uint64_t i, double& t, CDT* cdt)
 {
 	t = fRow.getDoubleField(i);
+	if (cdt)
+	{
+		*cdt = execplan::CalpontSystemCatalog::DOUBLE;
+	}
 }

-
-template<> void WindowFunctionType::getValue<float>(uint64_t i, float& t)
+template<> void WindowFunctionType::getValue<float>(uint64_t i, float& t, CDT* cdt)
 {
 	t = fRow.getFloatField(i);
+	if (cdt)
+	{
+		*cdt = execplan::CalpontSystemCatalog::FLOAT;
+	}
 }

-
-template<> void WindowFunctionType::getValue<string>(uint64_t i, string& t)
+template<> void WindowFunctionType::getValue<string>(uint64_t i, string& t, CDT* cdt)
 {
 	t = fRow.getStringField(i);
+	// By not setting cdt, we let it default to the column's type 
 }

-
 template<typename T> void WindowFunctionType::setValue(uint64_t i, T& t)
 {
 }

-
 template<> void WindowFunctionType::setValue<int64_t>(uint64_t i, int64_t& t)
 {
 	fRow.setIntField(t, i);
 }

-
 template<> void WindowFunctionType::setValue<uint64_t>(uint64_t i, uint64_t& t)
 {
 	fRow.setUintField(t, i);
 }

-
 template<> void WindowFunctionType::setValue<double>(uint64_t i, double& t)
 {
 	fRow.setDoubleField(t, i);
 }

-
 template<> void WindowFunctionType::setValue<float>(uint64_t i, float& t)
 {
 	fRow.setFloatField(t, i);
 }

-
 template<> void WindowFunctionType::setValue<string>(uint64_t i, string& t)
 {
 	fRow.setStringField(t, i);
 }

-
 template<typename T>
 void WindowFunctionType::setValue(int ct, int64_t b, int64_t e, int64_t c, T* v)
 {
@ -314,7 +324,6 @@ void WindowFunctionType::setValue(int ct, int64_t b, int64_t e, int64_t c, T* v)
 	}
 }

-
 template<typename T>
 void WindowFunctionType::implicit2T(uint64_t i, T& t, int s)
 {
@ -384,55 +393,47 @@ void WindowFunctionType::implicit2T(uint64_t i, T& t, int s)
 	}
 }

-
 template<>
 void WindowFunctionType::implicit2T<string>(uint64_t i, string& t, int)
 {
 	t = fRow.getStringField(i);
 }

-
 template<typename T>
 void WindowFunctionType::getConstValue(ConstantColumn* cc, T& t, bool& b)
 {
 }

-
 template<>
 void WindowFunctionType::getConstValue<int64_t>(ConstantColumn* cc, int64_t& t, bool& b)
 {
 	t = cc->getIntVal(fRow, b);
 }

-
 template<>
 void WindowFunctionType::getConstValue<uint64_t>(ConstantColumn* cc, uint64_t& t, bool& b)
 {
 	t = cc->getUintVal(fRow, b);
 }

-
 template<>
 void WindowFunctionType::getConstValue<double>(ConstantColumn* cc, double& t, bool& b)
 {
 	t = cc->getDoubleVal(fRow, b);
 }

-
 template<>
 void WindowFunctionType::getConstValue<float>(ConstantColumn* cc, float& t, bool& b)
 {
 	t = cc->getFloatVal(fRow, b);
 }

-
 template<>
 void WindowFunctionType::getConstValue<string>(ConstantColumn* cc, string& t, bool& b)
 {
 	t = cc->getStrVal(fRow, b);
 }

-
 template void WindowFunctionType::implicit2T<int64_t>(uint64_t, int64_t&, int);
 template void WindowFunctionType::implicit2T<uint64_t>(uint64_t, uint64_t&, int);
 template void WindowFunctionType::implicit2T<float>(uint64_t, float&, int);
@ -445,7 +446,6 @@ template void WindowFunctionType::setValue<double>(int, int64_t, int64_t, int64_
 template void WindowFunctionType::setValue<string>(int, int64_t, int64_t, int64_t, string*);


-
 void* WindowFunctionType::getNullValueByType(int ct, int pos)
 {
 	static uint64_t bigIntNull    = joblist::BIGINTNULL;
@ -566,7 +566,6 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos)
 	return v;
 }

-
 }   //namespace
 // vim:ts=4 sw=4:

--- a/utils/windowfunction/windowfunctiontype.h
+++ b/utils/windowfunction/windowfunctiontype.h
@ -98,8 +98,9 @@ const int WF__REGR_AVGY       = 32;
 const int WF__REGR_SXX        = 33;
 const int WF__REGR_SXY        = 34;
 const int WF__REGR_SYY        = 35;
+const int WF__UDAF            = 36;

-
+typedef execplan::CalpontSystemCatalog::ColDataType CDT;

 /** @brief class WindowFunction
 *
@ -129,6 +130,10 @@ public:
 	// @brief virtual parseParms()
 	virtual void parseParms(const std::vector<execplan::SRCP>&) {}

+	// @brief virtual dropValues() For UDAnF functions
+	// return false if there's no dropValue() implemented in the function.
+	virtual bool dropValues(int64_t, int64_t) {return false;}
+
 	// @brief virtual display method
 	virtual const std::string toString() const;

@ -148,14 +153,14 @@ public:
 	void peer(const boost::shared_ptr<ordering::EqualCompData>& p)  { fPeer = p; }
 	void setCallback(joblist::WindowFunctionStep* step)             { fStep = step; }

-	static boost::shared_ptr<WindowFunctionType> makeWindowFunction(const std::string&, int ct);
+	static boost::shared_ptr<WindowFunctionType> makeWindowFunction(const std::string&, int ct, WindowFunctionColumn* wc);

 protected:

 	static std::map<std::string, int> windowFunctionId;

 	// utility methods
-	template<typename T> void getValue(uint64_t, T&);
+	template<typename T> void getValue(uint64_t, T&, CDT* cdt = NULL);
 	template<typename T> void setValue(int, int64_t, int64_t, int64_t, T* = NULL);
 	template<typename T> void setValue(uint64_t, T&);
 	template<typename T> void implicit2T(uint64_t, T&, int);