mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-04-18 21:44:02 +03:00
This patch improves handling of NULLs in textual fields in ColumnStore. Previously empty strings were considered NULLs and it could be a problem if data scheme allows for empty strings. It was also one of major reasons of behavior difference between ColumnStore and other engines in MariaDB family. Also, this patch fixes some other bugs and incorrect behavior, for example, incorrect comparison for "column <= ''" which evaluates to constant True for all purposes before this patch.
635 lines
21 KiB
C++
635 lines
21 KiB
C++
/* Copyright (C) 2019 MariaDB Corporation
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
#include <sstream>
|
|
#include <cstring>
|
|
#include <typeinfo>
|
|
#include "moda.h"
|
|
#include "bytestream.h"
|
|
#include "objectreader.h"
|
|
#include "columnwidth.h"
|
|
|
|
using namespace mcsv1sdk;
|
|
|
|
// This is the standard way to get a UDAF function into the system's
|
|
// map of UDAF for lookup
|
|
class Add_moda_ToUDAFMap
|
|
{
|
|
public:
|
|
Add_moda_ToUDAFMap()
|
|
{
|
|
UDAFMap::getMap()["moda"] = new moda();
|
|
}
|
|
};
|
|
static Add_moda_ToUDAFMap addToMap;
|
|
|
|
// There are a few design options when creating a generic moda function:
|
|
// 1) Always use DOUBLE for internal storage
|
|
// Pros: can handle data from any native SQL type.
|
|
// Cons: If MODA(SUM()) is called, then the LONG DOUBLE returned by SUM will
|
|
// be truncated.
|
|
// It requires 8 bytes in the hash table and requires streaming 8 bytes
|
|
// per entry regardles of how small it could have been.
|
|
// 2) Always use LONG DOUBLE for internal storage
|
|
// Pros: Solves the problem of MODA(SUM())
|
|
// Cons: It requires 16 bytes in the hash table and requires streaming 16 bytes
|
|
// per entry regardles of how small it could have been.
|
|
// 3) Use the data type of the column for internal storage
|
|
// Pros: Can handle MODA(SUM()) because LONG DOUBLE all types are handeled
|
|
// Only the data size needed is stored in the hash table and streamed
|
|
//
|
|
// This class implements option 3 by creating templated classes.
|
|
// There are two moda classes, the main one called moda, which is basically
|
|
// an adapter (Pattern) to the templated class called Moda_impl_T.
|
|
//
|
|
// The way the API works, each function class is instantiated exactly once per
|
|
// executable and then accessed via a map. This means that the function classes
|
|
// could be used by any active query, or more than once by a single query. These
|
|
// classes have no data fields for this reason. All data for a specific query is
|
|
// maintained by the context object.
|
|
//
|
|
// Each possible templated instantation is created ate moda creation during startup.
|
|
// They are the Moda_impl_T members at the bottom of the moda class definition.
|
|
// At runtime getImpl() gets the right one for the datatype involved based on context.
|
|
//
|
|
// More template magic is done in the ModaData class to create and maintained
|
|
// a hash of the correct type.
|
|
|
|
// getImpl returns the current modaImpl or gets the correct one based on context.
|
|
mcsv1_UDAF* moda::getImpl(mcsv1Context* context)
|
|
{
|
|
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
|
if (data->modaImpl)
|
|
return data->modaImpl;
|
|
|
|
switch (context->getResultType())
|
|
{
|
|
case execplan::CalpontSystemCatalog::TINYINT: data->modaImpl = &moda_impl_int8; break;
|
|
case execplan::CalpontSystemCatalog::SMALLINT: data->modaImpl = &moda_impl_int16; break;
|
|
case execplan::CalpontSystemCatalog::MEDINT:
|
|
case execplan::CalpontSystemCatalog::INT: data->modaImpl = &moda_impl_int32; break;
|
|
case execplan::CalpontSystemCatalog::BIGINT: data->modaImpl = &moda_impl_int64; break;
|
|
case execplan::CalpontSystemCatalog::DECIMAL:
|
|
case execplan::CalpontSystemCatalog::UDECIMAL:
|
|
switch (context->getColWidth())
|
|
{
|
|
case 1: data->modaImpl = &moda_impl_int8; break;
|
|
case 2: data->modaImpl = &moda_impl_int16; break;
|
|
case 4: data->modaImpl = &moda_impl_int32; break;
|
|
case 8: data->modaImpl = &moda_impl_int64; break;
|
|
case 16: data->modaImpl = &moda_impl_int128; break;
|
|
}
|
|
break;
|
|
case execplan::CalpontSystemCatalog::UTINYINT: data->modaImpl = &moda_impl_uint8; break;
|
|
case execplan::CalpontSystemCatalog::USMALLINT: data->modaImpl = &moda_impl_uint16; break;
|
|
case execplan::CalpontSystemCatalog::UMEDINT:
|
|
case execplan::CalpontSystemCatalog::UINT: data->modaImpl = &moda_impl_uint32; break;
|
|
case execplan::CalpontSystemCatalog::UBIGINT: data->modaImpl = &moda_impl_uint64; break;
|
|
case execplan::CalpontSystemCatalog::FLOAT: data->modaImpl = &moda_impl_float; break;
|
|
case execplan::CalpontSystemCatalog::DOUBLE: data->modaImpl = &moda_impl_double; break;
|
|
case execplan::CalpontSystemCatalog::LONGDOUBLE: data->modaImpl = &moda_impl_longdouble; break;
|
|
|
|
case execplan::CalpontSystemCatalog::VARCHAR:
|
|
case execplan::CalpontSystemCatalog::CHAR:
|
|
data->modaImpl = &moda_impl_string;
|
|
break;
|
|
|
|
default: data->modaImpl = NULL;
|
|
}
|
|
return data->modaImpl;
|
|
}
|
|
|
|
mcsv1_UDAF::ReturnCode moda::init(mcsv1Context* context, ColumnDatum* colTypes)
|
|
{
|
|
if (context->getParameterCount() < 1)
|
|
{
|
|
// The error message will be prepended with
|
|
// "The storage engine for the table doesn't support "
|
|
context->setErrorMessage("moda() with 0 arguments");
|
|
return mcsv1_UDAF::ERROR;
|
|
}
|
|
|
|
if (context->getParameterCount() > 1)
|
|
{
|
|
context->setErrorMessage("moda() with more than 1 argument");
|
|
return mcsv1_UDAF::ERROR;
|
|
}
|
|
|
|
if (!(datatypes::isNumeric(colTypes[0].dataType)))
|
|
{
|
|
if (colTypes[0].dataType != datatypes::SystemCatalog::VARCHAR &&
|
|
colTypes[0].dataType != datatypes::SystemCatalog::CHAR)
|
|
{
|
|
// The error message will be prepended with
|
|
// "The storage engine for the table doesn't support "
|
|
context->setErrorMessage("moda() with invalid argument");
|
|
return mcsv1_UDAF::ERROR;
|
|
}
|
|
}
|
|
|
|
if (colTypes[0].dataType == execplan::CalpontSystemCatalog::DECIMAL ||
|
|
colTypes[0].dataType == execplan::CalpontSystemCatalog::UDECIMAL)
|
|
{
|
|
if (colTypes[0].precision < 3)
|
|
{
|
|
context->setColWidth(1);
|
|
}
|
|
else if (colTypes[0].precision < 4)
|
|
{
|
|
context->setColWidth(2);
|
|
}
|
|
else if (colTypes[0].precision < 9)
|
|
{
|
|
context->setColWidth(4);
|
|
}
|
|
else if (colTypes[0].precision < 19)
|
|
{
|
|
context->setColWidth(8);
|
|
}
|
|
else if (utils::widthByPrecision(colTypes[0].precision))
|
|
{
|
|
context->setColWidth(16);
|
|
}
|
|
|
|
context->setScale(colTypes[0].scale);
|
|
context->setPrecision(colTypes[0].precision);
|
|
}
|
|
|
|
context->setResultType(colTypes[0].dataType);
|
|
|
|
mcsv1_UDAF* impl = getImpl(context);
|
|
|
|
if (!impl)
|
|
{
|
|
// The error message will be prepended with
|
|
// "The storage engine for the table doesn't support "
|
|
context->setErrorMessage("moda() with implementation not found for data type");
|
|
return mcsv1_UDAF::ERROR;
|
|
}
|
|
|
|
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
|
|
return impl->init(context, colTypes);
|
|
}
|
|
|
|
template <class T>
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<T>::init(mcsv1Context* context, ColumnDatum* colTypes)
|
|
{
|
|
if (!(colTypes[0].dataType == execplan::CalpontSystemCatalog::DECIMAL ||
|
|
colTypes[0].dataType == execplan::CalpontSystemCatalog::UDECIMAL))
|
|
{
|
|
context->setColWidth(sizeof(T));
|
|
context->setScale(0);
|
|
context->setPrecision(0);
|
|
}
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
template <class T>
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<T>::reset(mcsv1Context* context)
|
|
{
|
|
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
|
data->fReturnType = context->getResultType();
|
|
data->fColWidth = context->getColWidth();
|
|
data->clear<T>();
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
template <class T>
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<T>::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
|
|
{
|
|
static_any::any& valIn = valsIn[0].columnData;
|
|
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
|
std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* map = data->getMap<T>();
|
|
|
|
if (valIn.empty())
|
|
{
|
|
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
|
|
}
|
|
|
|
T val = convertAnyTo<T>(valIn);
|
|
|
|
if (context->getResultType() == execplan::CalpontSystemCatalog::DOUBLE)
|
|
{
|
|
// For decimal types, we need to move the decimal point.
|
|
uint32_t scale = valsIn[0].scale;
|
|
|
|
if (val != 0 && scale > 0)
|
|
{
|
|
val /= datatypes::scaleDivisor<double>(scale);
|
|
}
|
|
}
|
|
|
|
data->fSum += val;
|
|
++data->fCount;
|
|
(*map)[val]++;
|
|
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
template <class T>
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<T>::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
|
|
{
|
|
if (!userDataIn)
|
|
{
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
ModaData* outData = static_cast<ModaData*>(context->getUserData());
|
|
const ModaData* inData = static_cast<const ModaData*>(userDataIn);
|
|
std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* outMap = outData->getMap<T>();
|
|
std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* inMap = inData->getMap<T>();
|
|
typename std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >::const_iterator iter;
|
|
|
|
for (iter = inMap->begin(); iter != inMap->end(); ++iter)
|
|
{
|
|
(*outMap)[iter->first] += iter->second;
|
|
}
|
|
// AVG
|
|
outData->fSum += inData->fSum;
|
|
outData->fCount += inData->fCount;
|
|
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
template <class T>
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<T>::evaluate(mcsv1Context* context, static_any::any& valOut)
|
|
{
|
|
uint64_t maxCnt = 0;
|
|
long double avg = 0;
|
|
T val = 0;
|
|
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
|
std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* map = data->getMap<T>();
|
|
|
|
if (map->size() == 0)
|
|
{
|
|
valOut = (T)0;
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
avg = data->fCount ? data->fSum / data->fCount : 0;
|
|
typename std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >::iterator iter;
|
|
|
|
for (iter = map->begin(); iter != map->end(); ++iter)
|
|
{
|
|
if (iter->second > maxCnt)
|
|
{
|
|
val = iter->first;
|
|
maxCnt = iter->second;
|
|
}
|
|
else if (iter->second == maxCnt)
|
|
{
|
|
T absval = val >= 0 ? val : -val;
|
|
T absfirst = iter->first >= 0 ? iter->first : -iter->first;
|
|
// Tie breaker: choose the closest to avg. If still tie, choose smallest
|
|
long double dist1 = val > avg ? (long double)val - avg : avg - (long double)val;
|
|
long double dist2 = iter->first > avg ? (long double)iter->first - avg : avg - (long double)iter->first;
|
|
if ((dist1 > dist2) || ((dist1 == dist2) && (absval > absfirst)))
|
|
{
|
|
val = iter->first;
|
|
}
|
|
}
|
|
}
|
|
|
|
// If scale is > 0, then the original type was DECIMAL. Set the
|
|
// ResultType to DECIMAL so the delivery logic moves the decimal point.
|
|
if (context->getScale() > 0)
|
|
context->setResultType(execplan::CalpontSystemCatalog::DECIMAL);
|
|
|
|
valOut = val;
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
template <class T>
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<T>::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
|
|
{
|
|
static_any::any& valDropped = valsDropped[0].columnData;
|
|
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
|
std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* map = data->getMap<T>();
|
|
|
|
if (valDropped.empty())
|
|
{
|
|
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
|
|
}
|
|
|
|
T val = convertAnyTo<T>(valDropped);
|
|
|
|
data->fSum -= val;
|
|
--data->fCount;
|
|
(*map)[val]--;
|
|
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
void ModaData::serialize(messageqcpp::ByteStream& bs) const
|
|
{
|
|
bs << fReturnType;
|
|
bs << fSum;
|
|
bs << fCount;
|
|
bs << fColWidth;
|
|
|
|
switch ((execplan::CalpontSystemCatalog::ColDataType)fReturnType)
|
|
{
|
|
case execplan::CalpontSystemCatalog::TINYINT: serializeMap<int8_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::SMALLINT: serializeMap<int16_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::MEDINT:
|
|
case execplan::CalpontSystemCatalog::INT: serializeMap<int32_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::BIGINT: serializeMap<int64_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::DECIMAL:
|
|
case execplan::CalpontSystemCatalog::UDECIMAL:
|
|
switch (fColWidth)
|
|
{
|
|
case 1: serializeMap<int8_t>(bs); break;
|
|
case 2: serializeMap<int16_t>(bs); break;
|
|
case 4: serializeMap<int32_t>(bs); break;
|
|
case 8: serializeMap<int64_t>(bs); break;
|
|
case 16: serializeMap<int128_t>(bs); break;
|
|
}
|
|
break;
|
|
case execplan::CalpontSystemCatalog::UTINYINT: serializeMap<uint8_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::USMALLINT: serializeMap<uint16_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::UMEDINT:
|
|
case execplan::CalpontSystemCatalog::UINT: serializeMap<uint32_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::UBIGINT: serializeMap<uint64_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::FLOAT: serializeMap<float>(bs); break;
|
|
case execplan::CalpontSystemCatalog::DOUBLE: serializeMap<double>(bs); break;
|
|
case execplan::CalpontSystemCatalog::LONGDOUBLE: serializeMap<long double>(bs); break;
|
|
case execplan::CalpontSystemCatalog::CHAR:
|
|
case execplan::CalpontSystemCatalog::VARCHAR:
|
|
serializeMap<string>(bs); break;
|
|
default: throw std::runtime_error("ModaData::serialize with bad data type"); break;
|
|
}
|
|
}
|
|
|
|
void ModaData::unserialize(messageqcpp::ByteStream& bs)
|
|
{
|
|
bs >> fReturnType;
|
|
bs >> fSum;
|
|
bs >> fCount;
|
|
bs >> fColWidth;
|
|
|
|
switch ((execplan::CalpontSystemCatalog::ColDataType)fReturnType)
|
|
{
|
|
case execplan::CalpontSystemCatalog::TINYINT: unserializeMap<int8_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::SMALLINT: unserializeMap<int16_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::MEDINT:
|
|
case execplan::CalpontSystemCatalog::INT: unserializeMap<int32_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::BIGINT: unserializeMap<int64_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::DECIMAL:
|
|
case execplan::CalpontSystemCatalog::UDECIMAL:
|
|
switch (fColWidth)
|
|
{
|
|
case 1: unserializeMap<int8_t>(bs); break;
|
|
case 2: unserializeMap<int16_t>(bs); break;
|
|
case 4: unserializeMap<int32_t>(bs); break;
|
|
case 8: unserializeMap<int64_t>(bs); break;
|
|
case 16: unserializeMap<int128_t>(bs); break;
|
|
}
|
|
break;
|
|
case execplan::CalpontSystemCatalog::UTINYINT: unserializeMap<uint8_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::USMALLINT: unserializeMap<uint16_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::UMEDINT:
|
|
case execplan::CalpontSystemCatalog::UINT: unserializeMap<uint32_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::UBIGINT: unserializeMap<uint64_t>(bs); break;
|
|
case execplan::CalpontSystemCatalog::FLOAT: unserializeMap<float>(bs); break;
|
|
case execplan::CalpontSystemCatalog::DOUBLE: unserializeMap<double>(bs); break;
|
|
case execplan::CalpontSystemCatalog::LONGDOUBLE: unserializeMap<long double>(bs); break;
|
|
case execplan::CalpontSystemCatalog::CHAR:
|
|
case execplan::CalpontSystemCatalog::VARCHAR:
|
|
unserializeMap<string>(bs); break;
|
|
default: throw std::runtime_error("ModaData::unserialize with bad data type"); break;
|
|
}
|
|
}
|
|
|
|
void ModaData::cleanup()
|
|
{
|
|
if (!fMap)
|
|
return;
|
|
switch ((execplan::CalpontSystemCatalog::ColDataType)fReturnType)
|
|
{
|
|
case execplan::CalpontSystemCatalog::TINYINT:
|
|
clear<int8_t>();
|
|
deleteMap<int8_t>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::SMALLINT:
|
|
clear<int16_t>();
|
|
deleteMap<int16_t>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::MEDINT:
|
|
case execplan::CalpontSystemCatalog::INT:
|
|
clear<int32_t>();
|
|
deleteMap<int32_t>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::BIGINT:
|
|
clear<int64_t>();
|
|
deleteMap<int64_t>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::DECIMAL:
|
|
case execplan::CalpontSystemCatalog::UDECIMAL:
|
|
switch (fColWidth)
|
|
{
|
|
case 1:
|
|
clear<int8_t>();
|
|
deleteMap<int8_t>();
|
|
break;
|
|
case 2:
|
|
clear<int16_t>();
|
|
deleteMap<int16_t>();
|
|
break;
|
|
case 4:
|
|
clear<int32_t>();
|
|
deleteMap<int32_t>();
|
|
break;
|
|
case 8:
|
|
clear<int64_t>();
|
|
deleteMap<int64_t>();
|
|
break;
|
|
case 16:
|
|
clear<int128_t>();
|
|
deleteMap<int128_t>();
|
|
break;
|
|
}
|
|
break;
|
|
case execplan::CalpontSystemCatalog::UTINYINT:
|
|
clear<uint8_t>();
|
|
deleteMap<uint8_t>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::USMALLINT:
|
|
clear<uint16_t>();
|
|
deleteMap<uint16_t>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::UMEDINT:
|
|
case execplan::CalpontSystemCatalog::UINT:
|
|
clear<uint32_t>();
|
|
deleteMap<uint32_t>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::UBIGINT:
|
|
clear<uint64_t>();
|
|
deleteMap<uint64_t>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::FLOAT:
|
|
clear<float>();
|
|
deleteMap<float>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::DOUBLE:
|
|
clear<double>();
|
|
deleteMap<double>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::LONGDOUBLE:
|
|
clear<long double>();
|
|
deleteMap<long double>();
|
|
break;
|
|
case execplan::CalpontSystemCatalog::CHAR:
|
|
case execplan::CalpontSystemCatalog::VARCHAR:
|
|
clear<string>();
|
|
deleteMap<string>();
|
|
break;
|
|
default: throw std::runtime_error("ModaData::cleanup with bad data type"); break;
|
|
}
|
|
}
|
|
|
|
/************************************************************************************************
|
|
* String Specialization
|
|
************************************************************************************************/
|
|
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::init(mcsv1Context* context, ColumnDatum* colTypes)
|
|
{
|
|
cs.setCharset(context->getCharsetNumber());
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::reset(mcsv1Context* context)
|
|
{
|
|
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
|
data->fReturnType = context->getResultType();
|
|
data->fColWidth = context->getColWidth();
|
|
data->fCs_num = context->getCharsetNumber();
|
|
data->clear<string>();
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
|
|
{
|
|
static_any::any& valIn = valsIn[0].columnData;
|
|
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
|
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* map = data->getMap<string>();
|
|
|
|
if (valIn.empty())
|
|
{
|
|
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
|
|
}
|
|
|
|
utils::NullString val;
|
|
if (valIn.compatible(strTypeId))
|
|
val = valIn.cast<utils::NullString>();
|
|
|
|
if (val.isNull())
|
|
{
|
|
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
|
|
}
|
|
|
|
(*map)[val.safeString("")]++;
|
|
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
|
|
{
|
|
if (!userDataIn)
|
|
{
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
ModaData* outData = static_cast<ModaData*>(context->getUserData());
|
|
const ModaData* inData = static_cast<const ModaData*>(userDataIn);
|
|
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* outMap = outData->getMap<string>();
|
|
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* inMap = inData->getMap<string>();
|
|
typename std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >::const_iterator iter;
|
|
|
|
for (iter = inMap->begin(); iter != inMap->end(); ++iter)
|
|
{
|
|
(*outMap)[iter->first] += iter->second;
|
|
}
|
|
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::evaluate(mcsv1Context* context, static_any::any& valOut)
|
|
{
|
|
uint64_t maxCnt = 0;
|
|
string val;
|
|
string lastVal;
|
|
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
|
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* map = data->getMap<string>();
|
|
|
|
if (map->size() == 0)
|
|
{
|
|
valOut = utils::NullString();
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
typename std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >::iterator iter;
|
|
|
|
for (iter = map->begin(); iter != map->end(); ++iter)
|
|
{
|
|
if (iter->second > maxCnt)
|
|
{
|
|
val = iter->first;
|
|
lastVal = val;
|
|
maxCnt = iter->second;
|
|
}
|
|
else if (iter->second == maxCnt)
|
|
{
|
|
// Tie breaker: choose smallest according to collation
|
|
if (cs.strnncollsp(iter->first, lastVal) < 0)
|
|
{
|
|
val = iter->first;
|
|
}
|
|
}
|
|
}
|
|
|
|
// If scale is > 0, then the original type was DECIMAL. Set the
|
|
// ResultType to DECIMAL so the delivery logic moves the decimal point.
|
|
if (context->getScale() > 0)
|
|
context->setResultType(execplan::CalpontSystemCatalog::DECIMAL);
|
|
|
|
utils::NullString ns(val);
|
|
valOut = ns;
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
|
|
{
|
|
static_any::any& valDropped = valsDropped[0].columnData;
|
|
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
|
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* map = data->getMap<string>();
|
|
|
|
if (valDropped.empty())
|
|
{
|
|
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
|
|
}
|
|
|
|
idbassert(0 && "incorrect logic - does not account for NullString");
|
|
string val = convertAnyTo<string>(valDropped);
|
|
|
|
--data->fCount;
|
|
(*map)[val]--;
|
|
|
|
return mcsv1_UDAF::SUCCESS;
|
|
}
|
|
|
|
|