mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-06-09 06:41:19 +03:00
635 lines
21 KiB
C++
635 lines
21 KiB
C++
/* Copyright (C) 2019 MariaDB Corporation
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
#include <sstream>
|
|
#include <cstring>
|
|
#include <typeinfo>
|
|
#include "moda.h"
|
|
#include "bytestream.h"
|
|
#include "objectreader.h"
|
|
#include "columnwidth.h"
|
|
|
|
using namespace mcsv1sdk;
|
|
|
|
// This is the standard way to get a UDAF function into the system's
|
|
// map of UDAF for lookup
|
|
class Add_moda_ToUDAFMap
|
|
{
|
|
public:
|
|
Add_moda_ToUDAFMap()
|
|
{
|
|
UDAFMap::getMap()["moda"] = new moda();
|
|
}
|
|
};
|
|
static Add_moda_ToUDAFMap addToMap;
|
|
|
|
// There are a few design options when creating a generic moda function:
|
|
// 1) Always use DOUBLE for internal storage
|
|
// Pros: can handle data from any native SQL type.
|
|
// Cons: If MODA(SUM()) is called, then the LONG DOUBLE returned by SUM will
|
|
// be truncated.
|
|
// It requires 8 bytes in the hash table and requires streaming 8 bytes
|
|
// per entry regardles of how small it could have been.
|
|
// 2) Always use LONG DOUBLE for internal storage
|
|
// Pros: Solves the problem of MODA(SUM())
|
|
// Cons: It requires 16 bytes in the hash table and requires streaming 16 bytes
|
|
// per entry regardles of how small it could have been.
|
|
// 3) Use the data type of the column for internal storage
|
|
// Pros: Can handle MODA(SUM()) because LONG DOUBLE all types are handeled
|
|
// Only the data size needed is stored in the hash table and streamed
|
|
//
|
|
// This class implements option 3 by creating templated classes.
|
|
// There are two moda classes, the main one called moda, which is basically
|
|
// an adapter (Pattern) to the templated class called Moda_impl_T.
|
|
//
|
|
// The way the API works, each function class is instantiated exactly once per
|
|
// executable and then accessed via a map. This means that the function classes
|
|
// could be used by any active query, or more than once by a single query. These
|
|
// classes have no data fields for this reason. All data for a specific query is
|
|
// maintained by the context object.
|
|
//
|
|
// Each possible templated instantation is created ate moda creation during startup.
|
|
// They are the Moda_impl_T members at the bottom of the moda class definition.
|
|
// At runtime getImpl() gets the right one for the datatype involved based on context.
|
|
//
|
|
// More template magic is done in the ModaData class to create and maintained
|
|
// a hash of the correct type.
|
|
|
|
// getImpl returns the current modaImpl or gets the correct one based on context.
|
|
mcsv1_UDAF* moda::getImpl(mcsv1Context* context)
|
|
{
|
|
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
|
if (data->modaImpl)
|
|
return data->modaImpl;
|
|
|
|
switch (context->getResultType())
|
|
{
|
|
case execplan::CalpontSystemCatalog::TINYINT: data->modaImpl = &moda_impl_int8; break;
|
|
case execplan::CalpontSystemCatalog::SMALLINT: data->modaImpl = &moda_impl_int16; break;
|
|
case execplan::CalpontSystemCatalog::MEDINT:
|
|
case execplan::CalpontSystemCatalog::INT: data->modaImpl = &moda_impl_int32; break;
|
|
case execplan::CalpontSystemCatalog::BIGINT: data->modaImpl = &moda_impl_int64; break;
|
|
case execplan::CalpontSystemCatalog::DECIMAL:
|
|
case execplan::CalpontSystemCatalog::UDECIMAL:
|
|
switch (context->getColWidth())
|
|
{
|
|
case 1: data->modaImpl = &moda_impl_int8; break;
|
|
case 2: data->modaImpl = &moda_impl_int16; break;
|
|
case 4: data->modaImpl = &moda_impl_int32; break;
|
|
case 8: data->modaImpl = &moda_impl_int64; break;
|
|
case 16: data->modaImpl = &moda_impl_int128; break;
|
|
}
|
|
break;
|
|
case execplan::CalpontSystemCatalog::UTINYINT: data->modaImpl = &moda_impl_uint8; break;
|
|
case execplan::CalpontSystemCatalog::USMALLINT: data->modaImpl = &moda_impl_uint16; break;
|
|
case execplan::CalpontSystemCatalog::UMEDINT:
|
|
case execplan::CalpontSystemCatalog::UINT: data->modaImpl = &moda_impl_uint32; break;
|
|
case execplan::CalpontSystemCatalog::UBIGINT: data->modaImpl = &moda_impl_uint64; break;
|
|
case execplan::CalpontSystemCatalog::FLOAT: data->modaImpl = &moda_impl_float; break;
|
|
case execplan::CalpontSystemCatalog::DOUBLE: data->modaImpl = &moda_impl_double; break;
|
|
case execplan::CalpontSystemCatalog::LONGDOUBLE: data->modaImpl = &moda_impl_longdouble; break;
|
|
|
|
case execplan::CalpontSystemCatalog::VARCHAR:
|
|
case execplan::CalpontSystemCatalog::CHAR:
|
|
data->modaImpl = &moda_impl_string;
|
|
break;
|
|
|
|
default: data->modaImpl = NULL;
|
|
}
|
|
return data->modaImpl;
|
|
}
|
|
|
|
mcsv1_UDAF::ReturnCode moda::init(mcsv1Context* context, ColumnDatum* colTypes)
|
|
{
|
|
if (context->getParameterCount() < 1)
|
|
{
|
|
// The error message will be prepended with
|
|
// "The storage engine for the table doesn't support "
|
|
context->setErrorMessage("moda() with 0 arguments");
|
|
return mcsv1_UDAF::ERROR;
|
|
}
|
|
|
|
if (context->getParameterCount() > 1)
|
|
{
|
|
context->setErrorMessage("moda() with more than 1 argument");
|
|
return mcsv1_UDAF::ERROR;
|
|
}
|
|
|
|
if (!(datatypes::isNumeric(colTypes[0].dataType)))
|
|
{
|
|
if (colTypes[0].dataType != datatypes::SystemCatalog::VARCHAR &&
|
|
colTypes[0].dataType != datatypes::SystemCatalog::CHAR)
|
|
{
|
|
// The error message will be prepended with
|
|
// "The storage engine for the table doesn't support "
|
|
context->setErrorMessage("moda() with invalid argument");
|
|
return mcsv1_UDAF::ERROR;
|
|
}
|
|
}
|
|
|
|
if (colTypes[0].dataType == execplan::CalpontSystemCatalog::DECIMAL ||
|
|
colTypes[0].dataType == execplan::CalpontSystemCatalog::UDECIMAL)
|
|
{
|
|
if (colTypes[0].precision < 3)
|
|
{
|
|
context->setColWidth(1);
|
|
}
|
|
else if (colTypes[0].precision < 4)
|
|
{
|
|
context->setColWidth(2);
|
|
}
|
|
else if (colTypes[0].precision < 9)
|
|
{
|
|
context->setColWidth(4);
|
|
}
|
|
else if (colTypes[0].precision < 19)
|
|
{
|
|
context->setColWidth(8);
|
|
}
|
|
else if (utils::widthByPrecision(colTypes[0].precision))
|
|
{
|
|
context->setColWidth(16);
|
|
}
|
|
|
|
context->setScale(colTypes[0].scale);
|
|
context->setPrecision(colTypes[0].precision);
|
|
}
|
|
|
|
context->setResultType(colTypes[0].dataType);
|
|
|
|
mcsv1_UDAF* impl = getImpl(context);
|
|
|
|
if (!impl)
|
|
{
|
|
// The error message will be prepended with
|
|
// "The storage engine for the table doesn't support "
|
|
context->setErrorMessage("moda() with implementation not found for data type");
|
|
return mcsv1_UDAF::ERROR;
|
|
}
|
|
|
|
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
|
|
return impl->init(context, colTypes);
|
|
}
|
|
|
|
template <class T>
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<T>::init(mcsv1Context* context, ColumnDatum* colTypes)
|
|
{
|
|
if (!(colTypes[0].dataType == execplan::CalpontSystemCatalog::DECIMAL ||
|
|
colTypes[0].dataType == execplan::CalpontSystemCatalog::UDECIMAL))
|
|
{
|
|
context->setColWidth(sizeof(T));
|
|
context->setScale(0);
|
|
context->setPrecision(0);
|
|
}
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
template <class T>
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<T>::reset(mcsv1Context* context)
|
|
{
|
|
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
|
data->fReturnType = context->getResultType();
|
|
data->fColWidth = context->getColWidth();
|
|
data->clear<T>();
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
template <class T>
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<T>::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
|
|
{
|
|
static_any::any& valIn = valsIn[0].columnData;
|
|
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
|
std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* map = data->getMap<T>();
|
|
|
|
if (valIn.empty())
|
|
{
|
|
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
|
|
}
|
|
|
|
T val = convertAnyTo<T>(valIn);
|
|
|
|
if (context->getResultType() == execplan::CalpontSystemCatalog::DOUBLE)
|
|
{
|
|
// For decimal types, we need to move the decimal point.
|
|
uint32_t scale = valsIn[0].scale;
|
|
|
|
if (val != 0 && scale > 0)
|
|
{
|
|
val /= datatypes::scaleDivisor<double>(scale);
|
|
}
|
|
}
|
|
|
|
data->fSum += val;
|
|
++data->fCount;
|
|
(*map)[val]++;
|
|
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
template <class T>
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<T>::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
|
|
{
|
|
if (!userDataIn)
|
|
{
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
ModaData* outData = static_cast<ModaData*>(context->getUserData());
|
|
const ModaData* inData = static_cast<const ModaData*>(userDataIn);
|
|
std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* outMap = outData->getMap<T>();
|
|
std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* inMap = inData->getMap<T>();
|
|
typename std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >::const_iterator iter;
|
|
|
|
for (iter = inMap->begin(); iter != inMap->end(); ++iter)
|
|
{
|
|
(*outMap)[iter->first] += iter->second;
|
|
}
|
|
// AVG
|
|
outData->fSum += inData->fSum;
|
|
outData->fCount += inData->fCount;
|
|
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
template <class T>
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<T>::evaluate(mcsv1Context* context, static_any::any& valOut)
|
|
{
|
|
uint64_t maxCnt = 0;
|
|
long double avg = 0;
|
|
T val = 0;
|
|
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
|
std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* map = data->getMap<T>();
|
|
|
|
if (map->size() == 0)
|
|
{
|
|
valOut = (T)0;
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
avg = data->fCount ? data->fSum / data->fCount : 0;
|
|
typename std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >::iterator iter;
|
|
|
|
for (iter = map->begin(); iter != map->end(); ++iter)
|
|
{
|
|
if (iter->second > maxCnt)
|
|
{
|
|
val = iter->first;
|
|
maxCnt = iter->second;
|
|
}
|
|
else if (iter->second == maxCnt)
|
|
{
|
|
T absval = val >= 0 ? val : -val;
|
|
T absfirst = iter->first >= 0 ? iter->first : -iter->first;
|
|
// Tie breaker: choose the closest to avg. If still tie, choose smallest
|
|
long double dist1 = val > avg ? (long double)val - avg : avg - (long double)val;
|
|
long double dist2 = iter->first > avg ? (long double)iter->first - avg : avg - (long double)iter->first;
|
|
if ((dist1 > dist2) || ((dist1 == dist2) && (absval > absfirst)))
|
|
{
|
|
val = iter->first;
|
|
}
|
|
}
|
|
}
|
|
|
|
// If scale is > 0, then the original type was DECIMAL. Set the
|
|
// ResultType to DECIMAL so the delivery logic moves the decimal point.
|
|
if (context->getScale() > 0)
|
|
context->setResultType(execplan::CalpontSystemCatalog::DECIMAL);
|
|
|
|
valOut = val;
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
template <class T>
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<T>::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
|
|
{
|
|
static_any::any& valDropped = valsDropped[0].columnData;
|
|
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
|
std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* map = data->getMap<T>();
|
|
|
|
if (valDropped.empty())
|
|
{
|
|
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
|
|
}
|
|
|
|
T val = convertAnyTo<T>(valDropped);
|
|
|
|
data->fSum -= val;
|
|
--data->fCount;
|
|
(*map)[val]--;
|
|
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
void ModaData::serialize(messageqcpp::ByteStream& bs) const
|
|
{
|
|
bs << fReturnType;
|
|
bs << fSum;
|
|
bs << fCount;
|
|
bs << fColWidth;
|
|
|
|
switch ((execplan::CalpontSystemCatalog::ColDataType)fReturnType)
|
|
{
|
|
case execplan::CalpontSystemCatalog::TINYINT: serializeMap<int8_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::SMALLINT: serializeMap<int16_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::MEDINT:
|
|
case execplan::CalpontSystemCatalog::INT: serializeMap<int32_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::BIGINT: serializeMap<int64_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::DECIMAL:
|
|
case execplan::CalpontSystemCatalog::UDECIMAL:
|
|
switch (fColWidth)
|
|
{
|
|
case 1: serializeMap<int8_t>(bs); break;
|
|
case 2: serializeMap<int16_t>(bs); break;
|
|
case 4: serializeMap<int32_t>(bs); break;
|
|
case 8: serializeMap<int64_t>(bs); break;
|
|
case 16: serializeMap<int128_t>(bs); break;
|
|
}
|
|
break;
|
|
case execplan::CalpontSystemCatalog::UTINYINT: serializeMap<uint8_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::USMALLINT: serializeMap<uint16_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::UMEDINT:
|
|
case execplan::CalpontSystemCatalog::UINT: serializeMap<uint32_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::UBIGINT: serializeMap<uint64_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::FLOAT: serializeMap<float>(bs); break;
|
|
case execplan::CalpontSystemCatalog::DOUBLE: serializeMap<double>(bs); break;
|
|
case execplan::CalpontSystemCatalog::LONGDOUBLE: serializeMap<long double>(bs); break;
|
|
case execplan::CalpontSystemCatalog::CHAR:
|
|
case execplan::CalpontSystemCatalog::VARCHAR:
|
|
serializeMap<string>(bs); break;
|
|
default: throw std::runtime_error("ModaData::serialize with bad data type"); break;
|
|
}
|
|
}
|
|
|
|
void ModaData::unserialize(messageqcpp::ByteStream& bs)
|
|
{
|
|
bs >> fReturnType;
|
|
bs >> fSum;
|
|
bs >> fCount;
|
|
bs >> fColWidth;
|
|
|
|
switch ((execplan::CalpontSystemCatalog::ColDataType)fReturnType)
|
|
{
|
|
case execplan::CalpontSystemCatalog::TINYINT: unserializeMap<int8_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::SMALLINT: unserializeMap<int16_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::MEDINT:
|
|
case execplan::CalpontSystemCatalog::INT: unserializeMap<int32_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::BIGINT: unserializeMap<int64_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::DECIMAL:
|
|
case execplan::CalpontSystemCatalog::UDECIMAL:
|
|
switch (fColWidth)
|
|
{
|
|
case 1: unserializeMap<int8_t>(bs); break;
|
|
case 2: unserializeMap<int16_t>(bs); break;
|
|
case 4: unserializeMap<int32_t>(bs); break;
|
|
case 8: unserializeMap<int64_t>(bs); break;
|
|
case 16: unserializeMap<int128_t>(bs); break;
|
|
}
|
|
break;
|
|
case execplan::CalpontSystemCatalog::UTINYINT: unserializeMap<uint8_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::USMALLINT: unserializeMap<uint16_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::UMEDINT:
|
|
case execplan::CalpontSystemCatalog::UINT: unserializeMap<uint32_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::UBIGINT: unserializeMap<uint64_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::FLOAT: unserializeMap<float>(bs); break;
|
|
case execplan::CalpontSystemCatalog::DOUBLE: unserializeMap<double>(bs); break;
|
|
case execplan::CalpontSystemCatalog::LONGDOUBLE: unserializeMap<long double>(bs); break;
|
|
case execplan::CalpontSystemCatalog::CHAR:
|
|
case execplan::CalpontSystemCatalog::VARCHAR:
|
|
unserializeMap<string>(bs); break;
|
|
default: throw std::runtime_error("ModaData::unserialize with bad data type"); break;
|
|
}
|
|
}
|
|
|
|
void ModaData::cleanup()
|
|
{
|
|
if (!fMap)
|
|
return;
|
|
switch ((execplan::CalpontSystemCatalog::ColDataType)fReturnType)
|
|
{
|
|
case execplan::CalpontSystemCatalog::TINYINT:
|
|
clear<int8_t>();
|
|
deleteMap<int8_t>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::SMALLINT:
|
|
clear<int16_t>();
|
|
deleteMap<int16_t>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::MEDINT:
|
|
case execplan::CalpontSystemCatalog::INT:
|
|
clear<int32_t>();
|
|
deleteMap<int32_t>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::BIGINT:
|
|
clear<int64_t>();
|
|
deleteMap<int64_t>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::DECIMAL:
|
|
case execplan::CalpontSystemCatalog::UDECIMAL:
|
|
switch (fColWidth)
|
|
{
|
|
case 1:
|
|
clear<int8_t>();
|
|
deleteMap<int8_t>();
|
|
break;
|
|
case 2:
|
|
clear<int16_t>();
|
|
deleteMap<int16_t>();
|
|
break;
|
|
case 4:
|
|
clear<int32_t>();
|
|
deleteMap<int32_t>();
|
|
break;
|
|
case 8:
|
|
clear<int64_t>();
|
|
deleteMap<int64_t>();
|
|
break;
|
|
case 16:
|
|
clear<int128_t>();
|
|
deleteMap<int128_t>();
|
|
break;
|
|
}
|
|
break;
|
|
case execplan::CalpontSystemCatalog::UTINYINT:
|
|
clear<uint8_t>();
|
|
deleteMap<uint8_t>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::USMALLINT:
|
|
clear<uint16_t>();
|
|
deleteMap<uint16_t>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::UMEDINT:
|
|
case execplan::CalpontSystemCatalog::UINT:
|
|
clear<uint32_t>();
|
|
deleteMap<uint32_t>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::UBIGINT:
|
|
clear<uint64_t>();
|
|
deleteMap<uint64_t>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::FLOAT:
|
|
clear<float>();
|
|
deleteMap<float>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::DOUBLE:
|
|
clear<double>();
|
|
deleteMap<double>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::LONGDOUBLE:
|
|
clear<long double>();
|
|
deleteMap<long double>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::CHAR:
|
|
case execplan::CalpontSystemCatalog::VARCHAR:
|
|
clear<string>();
|
|
deleteMap<string>();
|
|
break;
|
|
default: throw std::runtime_error("ModaData::cleanup with bad data type"); break;
|
|
}
|
|
}
|
|
|
|
/************************************************************************************************
|
|
* String Specialization
|
|
************************************************************************************************/
|
|
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::init(mcsv1Context* context, ColumnDatum* /*colTypes*/)
|
|
{
|
|
cs.setCharset(context->getCharsetNumber());
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::reset(mcsv1Context* context)
|
|
{
|
|
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
|
data->fReturnType = context->getResultType();
|
|
data->fColWidth = context->getColWidth();
|
|
data->fCs_num = context->getCharsetNumber();
|
|
data->clear<string>();
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
|
|
{
|
|
static_any::any& valIn = valsIn[0].columnData;
|
|
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
|
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* map = data->getMap<string>();
|
|
|
|
if (valIn.empty())
|
|
{
|
|
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
|
|
}
|
|
|
|
utils::NullString val;
|
|
if (valIn.compatible(strTypeId))
|
|
val = valIn.cast<utils::NullString>();
|
|
|
|
if (val.isNull())
|
|
{
|
|
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
|
|
}
|
|
|
|
(*map)[val.safeString("")]++;
|
|
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
|
|
{
|
|
if (!userDataIn)
|
|
{
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
ModaData* outData = static_cast<ModaData*>(context->getUserData());
|
|
const ModaData* inData = static_cast<const ModaData*>(userDataIn);
|
|
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* outMap = outData->getMap<string>();
|
|
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* inMap = inData->getMap<string>();
|
|
typename std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >::const_iterator iter;
|
|
|
|
for (iter = inMap->begin(); iter != inMap->end(); ++iter)
|
|
{
|
|
(*outMap)[iter->first] += iter->second;
|
|
}
|
|
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::evaluate(mcsv1Context* context, static_any::any& valOut)
|
|
{
|
|
uint64_t maxCnt = 0;
|
|
string val;
|
|
string lastVal;
|
|
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
|
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* map = data->getMap<string>();
|
|
|
|
if (map->size() == 0)
|
|
{
|
|
valOut = utils::NullString();
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
typename std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >::iterator iter;
|
|
|
|
for (iter = map->begin(); iter != map->end(); ++iter)
|
|
{
|
|
if (iter->second > maxCnt)
|
|
{
|
|
val = iter->first;
|
|
lastVal = val;
|
|
maxCnt = iter->second;
|
|
}
|
|
else if (iter->second == maxCnt)
|
|
{
|
|
// Tie breaker: choose smallest according to collation
|
|
if (cs.strnncollsp(iter->first, lastVal) < 0)
|
|
{
|
|
val = iter->first;
|
|
}
|
|
}
|
|
}
|
|
|
|
// If scale is > 0, then the original type was DECIMAL. Set the
|
|
// ResultType to DECIMAL so the delivery logic moves the decimal point.
|
|
if (context->getScale() > 0)
|
|
context->setResultType(execplan::CalpontSystemCatalog::DECIMAL);
|
|
|
|
utils::NullString ns(val);
|
|
valOut = ns;
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
|
|
{
|
|
static_any::any& valDropped = valsDropped[0].columnData;
|
|
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
|
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* map = data->getMap<string>();
|
|
|
|
if (valDropped.empty())
|
|
{
|
|
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
|
|
}
|
|
|
|
idbassert(0 && "incorrect logic - does not account for NullString");
|
|
string val = convertAnyTo<string>(valDropped);
|
|
|
|
--data->fCount;
|
|
(*map)[val]--;
|
|
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
|