You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
MCOL-523 Add UDAF and UDAnF SDK
This commit is contained in:
3
utils/windowfunction/CMakeLists.txt
Normal file → Executable file
3
utils/windowfunction/CMakeLists.txt
Normal file → Executable file
@ -21,7 +21,8 @@ set(windowfunction_LIB_SRCS
|
||||
wf_ranking.cpp
|
||||
wf_row_number.cpp
|
||||
wf_stats.cpp
|
||||
wf_sum_avg.cpp)
|
||||
wf_sum_avg.cpp
|
||||
wf_udaf.cpp)
|
||||
|
||||
add_library(windowfunction SHARED ${windowfunction_LIB_SRCS})
|
||||
|
||||
|
508
utils/windowfunction/wf_udaf.cpp
Executable file
508
utils/windowfunction/wf_udaf.cpp
Executable file
@ -0,0 +1,508 @@
|
||||
/************************************************************************************
|
||||
Copyright (C) 2017 MariaDB Corporation AB
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with this library; if not see <http://www.gnu.org/licenses>
|
||||
or write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St., Fifth Floor, Boston, MA 02110, USA
|
||||
*************************************************************************************/
|
||||
|
||||
|
||||
//#define NDEBUG
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
using namespace std;
|
||||
|
||||
#include <boost/shared_ptr.hpp>
|
||||
using namespace boost;
|
||||
|
||||
#include "loggingid.h"
|
||||
#include "errorcodes.h"
|
||||
#include "idberrorinfo.h"
|
||||
using namespace logging;
|
||||
|
||||
#include "rowgroup.h"
|
||||
using namespace rowgroup;
|
||||
|
||||
#include "idborderby.h"
|
||||
using namespace ordering;
|
||||
|
||||
#include "joblisttypes.h"
|
||||
#include "calpontsystemcatalog.h"
|
||||
#include "constantcolumn.h"
|
||||
using namespace execplan;
|
||||
|
||||
#include "windowfunctionstep.h"
|
||||
using namespace joblist;
|
||||
|
||||
#include "wf_udaf.h"
|
||||
|
||||
|
||||
namespace windowfunction
|
||||
{
|
||||
template<typename T>
|
||||
boost::shared_ptr<WindowFunctionType> WF_udaf<T>::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context)
|
||||
{
|
||||
boost::shared_ptr<WindowFunctionType> func;
|
||||
switch (ct)
|
||||
{
|
||||
case CalpontSystemCatalog::TINYINT:
|
||||
case CalpontSystemCatalog::SMALLINT:
|
||||
case CalpontSystemCatalog::MEDINT:
|
||||
case CalpontSystemCatalog::INT:
|
||||
case CalpontSystemCatalog::BIGINT:
|
||||
case CalpontSystemCatalog::DECIMAL:
|
||||
{
|
||||
func.reset(new WF_udaf<int64_t>(id, name, context));
|
||||
break;
|
||||
}
|
||||
case CalpontSystemCatalog::UTINYINT:
|
||||
case CalpontSystemCatalog::USMALLINT:
|
||||
case CalpontSystemCatalog::UMEDINT:
|
||||
case CalpontSystemCatalog::UINT:
|
||||
case CalpontSystemCatalog::UBIGINT:
|
||||
case CalpontSystemCatalog::UDECIMAL:
|
||||
{
|
||||
func.reset(new WF_udaf<uint64_t>(id, name, context));
|
||||
break;
|
||||
}
|
||||
case CalpontSystemCatalog::DOUBLE:
|
||||
case CalpontSystemCatalog::UDOUBLE:
|
||||
{
|
||||
func.reset(new WF_udaf<double>(id, name, context));
|
||||
break;
|
||||
}
|
||||
case CalpontSystemCatalog::FLOAT:
|
||||
case CalpontSystemCatalog::UFLOAT:
|
||||
{
|
||||
func.reset(new WF_udaf<float>(id, name, context));
|
||||
break;
|
||||
}
|
||||
case CalpontSystemCatalog::CHAR:
|
||||
case CalpontSystemCatalog::VARCHAR:
|
||||
case CalpontSystemCatalog::VARBINARY:
|
||||
case CalpontSystemCatalog::TEXT:
|
||||
case CalpontSystemCatalog::BLOB:
|
||||
{
|
||||
func.reset(new WF_udaf<string>(id, name, context));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
string errStr = name + "(" + colType2String[ct] + ")";
|
||||
errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr);
|
||||
cerr << errStr << endl;
|
||||
throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Get the UDAnF function object
|
||||
WF_udaf* wfUDAF = (WF_udaf*)func.get();
|
||||
mcsv1sdk::mcsv1Context& udafContext = wfUDAF->getContext();
|
||||
udafContext.setInterrupted(wfUDAF->getInterruptedPtr());
|
||||
wfUDAF->resetData();
|
||||
return func;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
WF_udaf<T>::WF_udaf(WF_udaf& rhs) : fUDAFContext(rhs.getContext()),
|
||||
bInterrupted(rhs.getInterrupted()),
|
||||
fDistinct(rhs.getDistinct())
|
||||
{
|
||||
getContext().setInterrupted(getInterruptedPtr());
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
WindowFunctionType* WF_udaf<T>::clone() const
|
||||
{
|
||||
return new WF_udaf(*const_cast<WF_udaf*>(this));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void WF_udaf<T>::resetData()
|
||||
{
|
||||
getContext().getFunction()->reset(&getContext());
|
||||
fSet.clear();
|
||||
WindowFunctionType::resetData();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void WF_udaf<T>::parseParms(const std::vector<execplan::SRCP>& parms)
|
||||
{
|
||||
bRespectNulls = true;
|
||||
// parms[1]: respect null | ignore null
|
||||
ConstantColumn* cc = dynamic_cast<ConstantColumn*>(parms[1].get());
|
||||
idbassert(cc != NULL);
|
||||
bool isNull = false; // dummy, harded coded
|
||||
bRespectNulls = (cc->getIntVal(fRow, isNull) > 0);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool WF_udaf<T>::dropValues(int64_t b, int64_t e)
|
||||
{
|
||||
if (!bHasDropValue)
|
||||
{
|
||||
// Save work if we discovered dropValue is not implemented in the UDAnF
|
||||
return false;
|
||||
}
|
||||
|
||||
mcsv1sdk::mcsv1_UDAF::ReturnCode rc;
|
||||
uint64_t colOut = fFieldIndex[0];
|
||||
uint64_t colIn = fFieldIndex[1];
|
||||
|
||||
mcsv1sdk::ColumnDatum datum;
|
||||
datum.dataType = fRow.getColType(colIn);
|
||||
datum.scale = fRow.getScale(colIn);
|
||||
datum.precision = fRow.getPrecision(colOut);
|
||||
|
||||
for (int64_t i = b; i < e; i++)
|
||||
{
|
||||
if (i % 1000 == 0 && fStep->cancelled())
|
||||
break;
|
||||
|
||||
fRow.setData(getPointer(fRowData->at(i)));
|
||||
// Turn on NULL flags
|
||||
std::vector<uint32_t> flags;
|
||||
uint32_t flag = 0;
|
||||
if (fRow.isNullValue(colIn) == true)
|
||||
{
|
||||
if (!bRespectNulls)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
flag |= mcsv1sdk::PARAM_IS_NULL;
|
||||
}
|
||||
flags.push_back(flag);
|
||||
getContext().setDataFlags(&flags);
|
||||
|
||||
T valIn;
|
||||
getValue(colIn, valIn, &datum.dataType);
|
||||
|
||||
// Check for distinct, if turned on.
|
||||
// TODO: when we impliment distinct, we need to revist this.
|
||||
if ((fDistinct) || (fSet.find(valIn) != fSet.end()))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
datum.columnData = valIn;
|
||||
|
||||
std::vector<mcsv1sdk::ColumnDatum> valsIn;
|
||||
valsIn.push_back(datum);
|
||||
|
||||
rc = getContext().getFunction()->dropValue(&getContext(), valsIn);
|
||||
if (rc == mcsv1sdk::mcsv1_UDAF::NOT_IMPLEMENTED)
|
||||
{
|
||||
bHasDropValue = false;
|
||||
return false;
|
||||
}
|
||||
if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
|
||||
{
|
||||
bInterrupted = true;
|
||||
string errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_UDANF_ERROR, getContext().getErrorMessage());
|
||||
cerr << errStr << endl;
|
||||
throw IDBExcept(errStr, ERR_WF_UDANF_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Sets the value from valOut into column colOut, performing any conversions.
|
||||
template<typename T>
|
||||
void WF_udaf<T>::SetUDAFValue(static_any::any& valOut, int64_t colOut,
|
||||
int64_t b, int64_t e, int64_t c)
|
||||
{
|
||||
static const static_any::any& charTypeId = (char)1;
|
||||
static const static_any::any& scharTypeId = (signed char)1;
|
||||
static const static_any::any& shortTypeId = (short)1;
|
||||
static const static_any::any& intTypeId = (int)1;
|
||||
static const static_any::any& longTypeId = (long)1;
|
||||
static const static_any::any& llTypeId = (long long)1;
|
||||
static const static_any::any& ucharTypeId = (unsigned char)1;
|
||||
static const static_any::any& ushortTypeId = (unsigned short)1;
|
||||
static const static_any::any& uintTypeId = (unsigned int)1;
|
||||
static const static_any::any& ulongTypeId = (unsigned long)1;
|
||||
static const static_any::any& ullTypeId = (unsigned long long)1;
|
||||
static const static_any::any& floatTypeId = (float)1;
|
||||
static const static_any::any& doubleTypeId = (double)1;
|
||||
static const std::string typeStr("");
|
||||
static const static_any::any& strTypeId = typeStr;
|
||||
|
||||
CDT colDataType = fRow.getColType(colOut);
|
||||
if (valOut.empty())
|
||||
{
|
||||
// If valOut is empty, we return NULL
|
||||
T* pv = NULL;
|
||||
setValue(colDataType, b, e, c, pv);
|
||||
fPrev = c;
|
||||
return;
|
||||
}
|
||||
|
||||
// This may seem a bit convoluted. Users shouldn't return a type
|
||||
// that they didn't set in mcsv1_UDAF::init(), but this
|
||||
// handles whatever return type is given and casts
|
||||
// it to whatever they said to return.
|
||||
int64_t intOut = 0;
|
||||
uint64_t uintOut = 0;
|
||||
float floatOut = 0.0;
|
||||
double doubleOut = 0.0;
|
||||
ostringstream oss;
|
||||
std::string strOut;
|
||||
|
||||
if (valOut.compatible(charTypeId))
|
||||
{
|
||||
uintOut = intOut = valOut.cast<char>();
|
||||
floatOut = intOut;
|
||||
oss << intOut;
|
||||
}
|
||||
else if (valOut.compatible(scharTypeId))
|
||||
{
|
||||
uintOut = intOut = valOut.cast<signed char>();
|
||||
floatOut = intOut;
|
||||
oss << intOut;
|
||||
}
|
||||
else if (valOut.compatible(shortTypeId))
|
||||
{
|
||||
uintOut = intOut = valOut.cast<short>();
|
||||
floatOut = intOut;
|
||||
oss << intOut;
|
||||
}
|
||||
else if (valOut.compatible(intTypeId))
|
||||
{
|
||||
uintOut = intOut = valOut.cast<int>();
|
||||
floatOut = intOut;
|
||||
oss << intOut;
|
||||
}
|
||||
else if (valOut.compatible(longTypeId))
|
||||
{
|
||||
uintOut = intOut = valOut.cast<long>();
|
||||
floatOut = intOut;
|
||||
oss << intOut;
|
||||
}
|
||||
else if (valOut.compatible(llTypeId))
|
||||
{
|
||||
uintOut = intOut = valOut.cast<long long>();
|
||||
floatOut = intOut;
|
||||
oss << intOut;
|
||||
}
|
||||
else if (valOut.compatible(ucharTypeId))
|
||||
{
|
||||
intOut = uintOut = valOut.cast<unsigned char>();
|
||||
floatOut = uintOut;
|
||||
oss << uintOut;
|
||||
}
|
||||
else if (valOut.compatible(ushortTypeId))
|
||||
{
|
||||
intOut = uintOut = valOut.cast<unsigned short>();
|
||||
floatOut = uintOut;
|
||||
oss << uintOut;
|
||||
}
|
||||
else if (valOut.compatible(uintTypeId))
|
||||
{
|
||||
intOut = uintOut = valOut.cast<unsigned int>();
|
||||
floatOut = uintOut;
|
||||
oss << uintOut;
|
||||
}
|
||||
else if (valOut.compatible(ulongTypeId))
|
||||
{
|
||||
intOut = uintOut = valOut.cast<unsigned long>();
|
||||
floatOut = uintOut;
|
||||
oss << uintOut;
|
||||
}
|
||||
else if (valOut.compatible(ullTypeId))
|
||||
{
|
||||
intOut = uintOut = valOut.cast<unsigned long long>();
|
||||
floatOut = uintOut;
|
||||
oss << uintOut;
|
||||
}
|
||||
else if (valOut.compatible(floatTypeId))
|
||||
{
|
||||
floatOut = valOut.cast<float>();
|
||||
doubleOut = floatOut;
|
||||
intOut = uintOut = floatOut;
|
||||
oss << floatOut;
|
||||
}
|
||||
else if (valOut.compatible(doubleTypeId))
|
||||
{
|
||||
doubleOut = valOut.cast<double>();
|
||||
floatOut = (float)doubleOut;
|
||||
uintOut = (uint64_t)doubleOut;
|
||||
intOut = (int64_t)doubleOut;
|
||||
oss << doubleOut;
|
||||
}
|
||||
|
||||
if (valOut.compatible(strTypeId))
|
||||
{
|
||||
std::string strOut = valOut.cast<std::string>();
|
||||
// Convert the string to numeric type, just in case.
|
||||
intOut = atol(strOut.c_str());
|
||||
uintOut = strtoul(strOut.c_str(), NULL, 10);
|
||||
doubleOut = strtod(strOut.c_str(), NULL);
|
||||
floatOut = (float)doubleOut;
|
||||
}
|
||||
else
|
||||
{
|
||||
strOut = oss.str();
|
||||
}
|
||||
|
||||
switch (colDataType)
|
||||
{
|
||||
case execplan::CalpontSystemCatalog::BIT:
|
||||
case execplan::CalpontSystemCatalog::TINYINT:
|
||||
case execplan::CalpontSystemCatalog::SMALLINT:
|
||||
case execplan::CalpontSystemCatalog::MEDINT:
|
||||
case execplan::CalpontSystemCatalog::INT:
|
||||
case execplan::CalpontSystemCatalog::BIGINT:
|
||||
case execplan::CalpontSystemCatalog::DECIMAL:
|
||||
case execplan::CalpontSystemCatalog::UDECIMAL:
|
||||
setValue(colDataType, b, e, c, &intOut);
|
||||
break;
|
||||
case execplan::CalpontSystemCatalog::UTINYINT:
|
||||
case execplan::CalpontSystemCatalog::USMALLINT:
|
||||
case execplan::CalpontSystemCatalog::UMEDINT:
|
||||
case execplan::CalpontSystemCatalog::UINT:
|
||||
case execplan::CalpontSystemCatalog::UBIGINT:
|
||||
case execplan::CalpontSystemCatalog::DATE:
|
||||
case execplan::CalpontSystemCatalog::DATETIME:
|
||||
setValue(colDataType, b, e, c, &uintOut);
|
||||
break;
|
||||
case execplan::CalpontSystemCatalog::FLOAT:
|
||||
case execplan::CalpontSystemCatalog::UFLOAT:
|
||||
setValue(colDataType, b, e, c, &floatOut);
|
||||
break;
|
||||
case execplan::CalpontSystemCatalog::DOUBLE:
|
||||
case execplan::CalpontSystemCatalog::UDOUBLE:
|
||||
setValue(colDataType, b, e, c, &doubleOut);
|
||||
break;
|
||||
case execplan::CalpontSystemCatalog::CHAR:
|
||||
case execplan::CalpontSystemCatalog::VARCHAR:
|
||||
case execplan::CalpontSystemCatalog::TEXT:
|
||||
case execplan::CalpontSystemCatalog::VARBINARY:
|
||||
case execplan::CalpontSystemCatalog::CLOB:
|
||||
case execplan::CalpontSystemCatalog::BLOB:
|
||||
setValue(colDataType, b, e, c, &strOut);
|
||||
break;
|
||||
default:
|
||||
{
|
||||
std::ostringstream errmsg;
|
||||
errmsg << "WF_udaf: No logic for data type: " << colDataType;
|
||||
cerr << errmsg.str() << endl;
|
||||
throw runtime_error(errmsg.str().c_str());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void WF_udaf<T>::operator()(int64_t b, int64_t e, int64_t c)
|
||||
{
|
||||
mcsv1sdk::mcsv1_UDAF::ReturnCode rc;
|
||||
uint64_t colOut = fFieldIndex[0];
|
||||
static_any::any valOut;
|
||||
|
||||
if ((fFrameUnit == WF__FRAME_ROWS) ||
|
||||
(fPrev == -1) ||
|
||||
(!fPeer->operator()(getPointer(fRowData->at(c)), getPointer(fRowData->at(fPrev)))))
|
||||
{
|
||||
// for unbounded - current row special handling
|
||||
if (fPrev >= b && fPrev < c)
|
||||
b = c;
|
||||
else if (fPrev <= e && fPrev > c)
|
||||
e = c;
|
||||
|
||||
uint64_t colIn = fFieldIndex[1];
|
||||
|
||||
mcsv1sdk::ColumnDatum datum;
|
||||
datum.dataType = fRow.getColType(colIn);
|
||||
datum.scale = fRow.getScale(colIn);
|
||||
datum.precision = fRow.getPrecision(colOut);
|
||||
|
||||
if (b<=c && c<=e)
|
||||
getContext().setContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW);
|
||||
else
|
||||
getContext().clearContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW);
|
||||
|
||||
|
||||
for (int64_t i = b; i <= e; i++)
|
||||
{
|
||||
if (i % 1000 == 0 && fStep->cancelled())
|
||||
break;
|
||||
|
||||
fRow.setData(getPointer(fRowData->at(i)));
|
||||
// Turn on NULL flags
|
||||
std::vector<uint32_t> flags;
|
||||
uint32_t flag = 0;
|
||||
if (fRow.isNullValue(colIn) == true)
|
||||
{
|
||||
if (!bRespectNulls)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
flag |= mcsv1sdk::PARAM_IS_NULL;
|
||||
}
|
||||
flags.push_back(flag);
|
||||
getContext().setDataFlags(&flags);
|
||||
|
||||
T valIn;
|
||||
getValue(colIn, valIn, &datum.dataType);
|
||||
|
||||
// Check for distinct, if turned on.
|
||||
if ((fDistinct) || (fSet.find(valIn) != fSet.end()))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fDistinct)
|
||||
fSet.insert(valIn);
|
||||
|
||||
datum.columnData = valIn;
|
||||
|
||||
std::vector<mcsv1sdk::ColumnDatum> valsIn;
|
||||
valsIn.push_back(datum);
|
||||
|
||||
rc = getContext().getFunction()->nextValue(&getContext(), valsIn);
|
||||
if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
|
||||
{
|
||||
bInterrupted = true;
|
||||
string errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_UDANF_ERROR, getContext().getErrorMessage());
|
||||
cerr << errStr << endl;
|
||||
throw IDBExcept(errStr, ERR_WF_UDANF_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
rc = getContext().getFunction()->evaluate(&getContext(), fValOut);
|
||||
if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
|
||||
{
|
||||
bInterrupted = true;
|
||||
string errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_UDANF_ERROR, getContext().getErrorMessage());
|
||||
cerr << errStr << endl;
|
||||
throw IDBExcept(errStr, ERR_WF_UDANF_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
SetUDAFValue(fValOut, colOut, b, e, c);
|
||||
|
||||
fPrev = c;
|
||||
}
|
||||
|
||||
template
|
||||
boost::shared_ptr<WindowFunctionType> WF_udaf<int64_t>::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context);
|
||||
|
||||
} //namespace
|
||||
// vim:ts=4 sw=4:
|
||||
|
77
utils/windowfunction/wf_udaf.h
Executable file
77
utils/windowfunction/wf_udaf.h
Executable file
@ -0,0 +1,77 @@
|
||||
/************************************************************************************
|
||||
Copyright (C) 2017 MariaDB Corporation AB
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with this library; if not see <http://www.gnu.org/licenses>
|
||||
or write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St., Fifth Floor, Boston, MA 02110, USA
|
||||
*************************************************************************************/
|
||||
|
||||
|
||||
#ifndef UTILS_WF_UDAF_H
|
||||
#define UTILS_WF_UDAF_H
|
||||
|
||||
#include <set>
|
||||
#include "windowfunctiontype.h"
|
||||
#include "mcsv1_udaf.h"
|
||||
|
||||
|
||||
namespace windowfunction
|
||||
{
|
||||
|
||||
// A class to control the execution of User Define Analytic Functions (UDAnF)
|
||||
// as defined by a specialization of mcsv1sdk::mcsv1_UDAF
|
||||
// The template parameter is currently only used to support DISTINCT, as
|
||||
// as that is done via a set<T>
|
||||
template<typename T>
|
||||
class WF_udaf : public WindowFunctionType
|
||||
{
|
||||
public:
|
||||
WF_udaf(int id, const std::string& name, mcsv1sdk::mcsv1Context& context) :
|
||||
WindowFunctionType(id, name), fUDAFContext(context), fDistinct(false), bHasDropValue(true) {}
|
||||
WF_udaf(WF_udaf& rhs);
|
||||
// pure virtual in base
|
||||
void operator()(int64_t b, int64_t e, int64_t c);
|
||||
WindowFunctionType* clone() const;
|
||||
void resetData();
|
||||
void parseParms(const std::vector<execplan::SRCP>&);
|
||||
virtual bool dropValues(int64_t, int64_t);
|
||||
|
||||
mcsv1sdk::mcsv1Context& getContext() {return fUDAFContext;}
|
||||
bool getInterrupted() {return bInterrupted;}
|
||||
bool getInterruptedPtr() {return &bInterrupted;}
|
||||
bool getDistinct() {return fDistinct;}
|
||||
|
||||
protected:
|
||||
void SetUDAFValue(static_any::any& valOut, int64_t colOut, int64_t b, int64_t e, int64_t c);
|
||||
|
||||
mcsv1sdk::mcsv1Context fUDAFContext; // The UDAF context
|
||||
bool bInterrupted; // Shared by all the threads
|
||||
bool fDistinct;
|
||||
bool bRespectNulls; // respect null | ignore null
|
||||
bool bHasDropValue; // Set to false when we discover the UDAnF doesn't implement dropValue.
|
||||
std::set<T> fSet; // To hold distinct values
|
||||
static_any::any fValOut; // The return value
|
||||
|
||||
public:
|
||||
static boost::shared_ptr<WindowFunctionType> makeFunction(int id, const string& name,
|
||||
int ct, mcsv1sdk::mcsv1Context& context);
|
||||
};
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // UTILS_WF_UDAF_H
|
||||
|
||||
// vim:ts=4 sw=4:
|
||||
|
30
utils/windowfunction/windowfunction.cpp
Normal file → Executable file
30
utils/windowfunction/windowfunction.cpp
Normal file → Executable file
@ -163,11 +163,35 @@ void WindowFunction::operator()()
|
||||
}
|
||||
else
|
||||
{
|
||||
pair<int64_t, int64_t> w;
|
||||
pair<int64_t, int64_t> prevFrame;
|
||||
int64_t b, e;
|
||||
bool firstTime = true;
|
||||
for (int64_t i = begin; i <= end && !fStep->cancelled(); i++)
|
||||
{
|
||||
pair<int64_t, int64_t> w = fFrame->getWindow(begin, end, i);
|
||||
fFunctionType->resetData();
|
||||
fFunctionType->operator()(w.first, w.second, i);
|
||||
w = fFrame->getWindow(begin, end, i);
|
||||
b = w.first;
|
||||
e = w.second;
|
||||
if (firstTime)
|
||||
{
|
||||
prevFrame = w;
|
||||
}
|
||||
// UDAnF functions may have a dropValue function implemented.
|
||||
// If they do, we can optimize by calling dropValue() for those
|
||||
// values leaving the window and nextValue for those entering, rather
|
||||
// than a resetData() and then iterating over the entire window.
|
||||
// Built-in functions may have this functionality added in the future.
|
||||
if (fFunctionType->dropValues(prevFrame.first, w.first))
|
||||
{
|
||||
b = firstTime ? w.first : prevFrame.second+1;
|
||||
}
|
||||
else
|
||||
{
|
||||
fFunctionType->resetData();
|
||||
}
|
||||
fFunctionType->operator()(b, e, i);
|
||||
prevFrame = w;
|
||||
firstTime = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
63
utils/windowfunction/windowfunctiontype.cpp
Normal file → Executable file
63
utils/windowfunction/windowfunctiontype.cpp
Normal file → Executable file
@ -58,6 +58,7 @@ using namespace joblist;
|
||||
#include "wf_row_number.h"
|
||||
#include "wf_stats.h"
|
||||
#include "wf_sum_avg.h"
|
||||
#include "wf_udaf.h"
|
||||
|
||||
namespace windowfunction
|
||||
{
|
||||
@ -137,13 +138,16 @@ map<string, int> WindowFunctionType::windowFunctionId = assign::map_list_of
|
||||
(string("REGR_SXX"), WF__REGR_SXX)
|
||||
(string("REGR_SXY"), WF__REGR_SXY)
|
||||
(string("REGR_SYY"), WF__REGR_SYY)
|
||||
(string("UDAF_FUNC"), WF__UDAF)
|
||||
;
|
||||
|
||||
boost::shared_ptr<WindowFunctionType>
|
||||
WindowFunctionType::makeWindowFunction(const string& name, int ct)
|
||||
WindowFunctionType::makeWindowFunction(const string& name, int ct, WindowFunctionColumn* wc)
|
||||
{
|
||||
boost::shared_ptr<WindowFunctionType> af;
|
||||
int functionId = windowFunctionId[algorithm::to_upper_copy(name)];
|
||||
// The template parameters here are dummies to execute the static makeFunction
|
||||
// which sets the real type based on ct.
|
||||
switch (functionId)
|
||||
{
|
||||
case WF__COUNT_ASTERISK:
|
||||
@ -192,6 +196,9 @@ boost::shared_ptr<WindowFunctionType>
|
||||
case WF__PERCENTILE_DISC:
|
||||
af = WF_percentile<int64_t>::makeFunction(functionId, name, ct);
|
||||
break;
|
||||
case WF__UDAF:
|
||||
af = WF_udaf<int64_t>::makeFunction(functionId, name, ct, wc->getUDAFContext());
|
||||
break;
|
||||
case WF__REGR_SLOPE:
|
||||
case WF__REGR_INTERCEPT:
|
||||
case WF__REGR_COUNT:
|
||||
@ -211,7 +218,6 @@ boost::shared_ptr<WindowFunctionType>
|
||||
return af;
|
||||
}
|
||||
|
||||
|
||||
const string WindowFunctionType::toString() const
|
||||
{
|
||||
ostringstream oss;
|
||||
@ -223,77 +229,81 @@ const string WindowFunctionType::toString() const
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
|
||||
template<typename T> void WindowFunctionType::getValue(uint64_t i, T& t)
|
||||
template<typename T> void WindowFunctionType::getValue(uint64_t i, T& t, CDT* cdt)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template<> void WindowFunctionType::getValue<int64_t>(uint64_t i, int64_t& t)
|
||||
template<> void WindowFunctionType::getValue<int64_t>(uint64_t i, int64_t& t, CDT* cdt)
|
||||
{
|
||||
t = fRow.getIntField(i);
|
||||
if (cdt)
|
||||
{
|
||||
*cdt = execplan::CalpontSystemCatalog::BIGINT;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<> void WindowFunctionType::getValue<uint64_t>(uint64_t i, uint64_t& t)
|
||||
template<> void WindowFunctionType::getValue<uint64_t>(uint64_t i, uint64_t& t, CDT* cdt)
|
||||
{
|
||||
t = fRow.getUintField(i);
|
||||
if (cdt)
|
||||
{
|
||||
*cdt = execplan::CalpontSystemCatalog::UBIGINT;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<> void WindowFunctionType::getValue<double>(uint64_t i, double& t)
|
||||
template<> void WindowFunctionType::getValue<double>(uint64_t i, double& t, CDT* cdt)
|
||||
{
|
||||
t = fRow.getDoubleField(i);
|
||||
if (cdt)
|
||||
{
|
||||
*cdt = execplan::CalpontSystemCatalog::DOUBLE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<> void WindowFunctionType::getValue<float>(uint64_t i, float& t)
|
||||
template<> void WindowFunctionType::getValue<float>(uint64_t i, float& t, CDT* cdt)
|
||||
{
|
||||
t = fRow.getFloatField(i);
|
||||
if (cdt)
|
||||
{
|
||||
*cdt = execplan::CalpontSystemCatalog::FLOAT;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<> void WindowFunctionType::getValue<string>(uint64_t i, string& t)
|
||||
template<> void WindowFunctionType::getValue<string>(uint64_t i, string& t, CDT* cdt)
|
||||
{
|
||||
t = fRow.getStringField(i);
|
||||
// By not setting cdt, we let it default to the column's type
|
||||
}
|
||||
|
||||
|
||||
template<typename T> void WindowFunctionType::setValue(uint64_t i, T& t)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template<> void WindowFunctionType::setValue<int64_t>(uint64_t i, int64_t& t)
|
||||
{
|
||||
fRow.setIntField(t, i);
|
||||
}
|
||||
|
||||
|
||||
template<> void WindowFunctionType::setValue<uint64_t>(uint64_t i, uint64_t& t)
|
||||
{
|
||||
fRow.setUintField(t, i);
|
||||
}
|
||||
|
||||
|
||||
template<> void WindowFunctionType::setValue<double>(uint64_t i, double& t)
|
||||
{
|
||||
fRow.setDoubleField(t, i);
|
||||
}
|
||||
|
||||
|
||||
template<> void WindowFunctionType::setValue<float>(uint64_t i, float& t)
|
||||
{
|
||||
fRow.setFloatField(t, i);
|
||||
}
|
||||
|
||||
|
||||
template<> void WindowFunctionType::setValue<string>(uint64_t i, string& t)
|
||||
{
|
||||
fRow.setStringField(t, i);
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
void WindowFunctionType::setValue(int ct, int64_t b, int64_t e, int64_t c, T* v)
|
||||
{
|
||||
@ -314,7 +324,6 @@ void WindowFunctionType::setValue(int ct, int64_t b, int64_t e, int64_t c, T* v)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
void WindowFunctionType::implicit2T(uint64_t i, T& t, int s)
|
||||
{
|
||||
@ -384,55 +393,47 @@ void WindowFunctionType::implicit2T(uint64_t i, T& t, int s)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
void WindowFunctionType::implicit2T<string>(uint64_t i, string& t, int)
|
||||
{
|
||||
t = fRow.getStringField(i);
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
void WindowFunctionType::getConstValue(ConstantColumn* cc, T& t, bool& b)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
void WindowFunctionType::getConstValue<int64_t>(ConstantColumn* cc, int64_t& t, bool& b)
|
||||
{
|
||||
t = cc->getIntVal(fRow, b);
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
void WindowFunctionType::getConstValue<uint64_t>(ConstantColumn* cc, uint64_t& t, bool& b)
|
||||
{
|
||||
t = cc->getUintVal(fRow, b);
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
void WindowFunctionType::getConstValue<double>(ConstantColumn* cc, double& t, bool& b)
|
||||
{
|
||||
t = cc->getDoubleVal(fRow, b);
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
void WindowFunctionType::getConstValue<float>(ConstantColumn* cc, float& t, bool& b)
|
||||
{
|
||||
t = cc->getFloatVal(fRow, b);
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
void WindowFunctionType::getConstValue<string>(ConstantColumn* cc, string& t, bool& b)
|
||||
{
|
||||
t = cc->getStrVal(fRow, b);
|
||||
}
|
||||
|
||||
|
||||
template void WindowFunctionType::implicit2T<int64_t>(uint64_t, int64_t&, int);
|
||||
template void WindowFunctionType::implicit2T<uint64_t>(uint64_t, uint64_t&, int);
|
||||
template void WindowFunctionType::implicit2T<float>(uint64_t, float&, int);
|
||||
@ -445,7 +446,6 @@ template void WindowFunctionType::setValue<double>(int, int64_t, int64_t, int64_
|
||||
template void WindowFunctionType::setValue<string>(int, int64_t, int64_t, int64_t, string*);
|
||||
|
||||
|
||||
|
||||
void* WindowFunctionType::getNullValueByType(int ct, int pos)
|
||||
{
|
||||
static uint64_t bigIntNull = joblist::BIGINTNULL;
|
||||
@ -566,7 +566,6 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos)
|
||||
return v;
|
||||
}
|
||||
|
||||
|
||||
} //namespace
|
||||
// vim:ts=4 sw=4:
|
||||
|
||||
|
11
utils/windowfunction/windowfunctiontype.h
Normal file → Executable file
11
utils/windowfunction/windowfunctiontype.h
Normal file → Executable file
@ -98,8 +98,9 @@ const int WF__REGR_AVGY = 32;
|
||||
const int WF__REGR_SXX = 33;
|
||||
const int WF__REGR_SXY = 34;
|
||||
const int WF__REGR_SYY = 35;
|
||||
const int WF__UDAF = 36;
|
||||
|
||||
|
||||
typedef execplan::CalpontSystemCatalog::ColDataType CDT;
|
||||
|
||||
/** @brief class WindowFunction
|
||||
*
|
||||
@ -129,6 +130,10 @@ public:
|
||||
// @brief virtual parseParms()
|
||||
virtual void parseParms(const std::vector<execplan::SRCP>&) {}
|
||||
|
||||
// @brief virtual dropValues() For UDAnF functions
|
||||
// return false if there's no dropValue() implemented in the function.
|
||||
virtual bool dropValues(int64_t, int64_t) {return false;}
|
||||
|
||||
// @brief virtual display method
|
||||
virtual const std::string toString() const;
|
||||
|
||||
@ -148,14 +153,14 @@ public:
|
||||
void peer(const boost::shared_ptr<ordering::EqualCompData>& p) { fPeer = p; }
|
||||
void setCallback(joblist::WindowFunctionStep* step) { fStep = step; }
|
||||
|
||||
static boost::shared_ptr<WindowFunctionType> makeWindowFunction(const std::string&, int ct);
|
||||
static boost::shared_ptr<WindowFunctionType> makeWindowFunction(const std::string&, int ct, WindowFunctionColumn* wc);
|
||||
|
||||
protected:
|
||||
|
||||
static std::map<std::string, int> windowFunctionId;
|
||||
|
||||
// utility methods
|
||||
template<typename T> void getValue(uint64_t, T&);
|
||||
template<typename T> void getValue(uint64_t, T&, CDT* cdt = NULL);
|
||||
template<typename T> void setValue(int, int64_t, int64_t, int64_t, T* = NULL);
|
||||
template<typename T> void setValue(uint64_t, T&);
|
||||
template<typename T> void implicit2T(uint64_t, T&, int);
|
||||
|
Reference in New Issue
Block a user