You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-08-08 14:22:09 +03:00
MCOL-4171
This commit is contained in:
committed by
Roman Nozdrin
parent
5287e6860b
commit
638202417f
@@ -103,9 +103,12 @@ mcsv1_UDAF* moda::getImpl(mcsv1Context* context)
|
||||
case 4:
|
||||
data->modaImpl = &moda_impl_int32;
|
||||
break;
|
||||
default:
|
||||
case 8:
|
||||
data->modaImpl = &moda_impl_int64;
|
||||
break;
|
||||
case 16:
|
||||
data->modaImpl = &moda_impl_int128;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case execplan::CalpontSystemCatalog::UTINYINT:
|
||||
@@ -179,11 +182,18 @@ mcsv1_UDAF::ReturnCode moda::init(mcsv1Context* context,
|
||||
{
|
||||
context->setColWidth(4);
|
||||
}
|
||||
else
|
||||
else if (colTypes[0].precision < 19)
|
||||
{
|
||||
context->setColWidth(8);
|
||||
}
|
||||
else
|
||||
{
|
||||
context->setColWidth(16);
|
||||
}
|
||||
|
||||
context->setScale(colTypes[0].scale);
|
||||
}
|
||||
context->setPrecision(colTypes[0].precision);
|
||||
|
||||
mcsv1_UDAF* impl = getImpl(context);
|
||||
|
||||
@@ -203,10 +213,7 @@ template<class T>
|
||||
mcsv1_UDAF::ReturnCode Moda_impl_T<T>::init(mcsv1Context* context,
|
||||
ColumnDatum* colTypes)
|
||||
{
|
||||
context->setScale(context->getScale());
|
||||
context->setPrecision(19);
|
||||
return mcsv1_UDAF::SUCCESS;
|
||||
|
||||
}
|
||||
|
||||
template<class T>
|
||||
@@ -224,7 +231,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::nextValue(mcsv1Context* context, ColumnDa
|
||||
{
|
||||
static_any::any& valIn = valsIn[0].columnData;
|
||||
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
||||
std::unordered_map<T, uint32_t>* map = data->getMap<T>();
|
||||
std::unordered_map<T, uint32_t, hasher<T> >* map = data->getMap<T>();
|
||||
|
||||
if (valIn.empty())
|
||||
{
|
||||
@@ -261,9 +268,9 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::subEvaluate(mcsv1Context* context, const
|
||||
|
||||
ModaData* outData = static_cast<ModaData*>(context->getUserData());
|
||||
const ModaData* inData = static_cast<const ModaData*>(userDataIn);
|
||||
std::unordered_map<T, uint32_t>* outMap = outData->getMap<T>();
|
||||
std::unordered_map<T, uint32_t>* inMap = inData->getMap<T>();
|
||||
typename std::unordered_map<T, uint32_t>::const_iterator iter;
|
||||
std::unordered_map<T, uint32_t, hasher<T> >* outMap = outData->getMap<T>();
|
||||
std::unordered_map<T, uint32_t, hasher<T> >* inMap = inData->getMap<T>();
|
||||
typename std::unordered_map<T, uint32_t, hasher<T> >::const_iterator iter;
|
||||
|
||||
for (iter = inMap->begin(); iter != inMap->end(); ++iter)
|
||||
{
|
||||
@@ -283,7 +290,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::evaluate(mcsv1Context* context, static_an
|
||||
long double avg = 0;
|
||||
T val = 0;
|
||||
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
||||
std::unordered_map<T, uint32_t>* map = data->getMap<T>();
|
||||
std::unordered_map<T, uint32_t, hasher<T> >* map = data->getMap<T>();
|
||||
|
||||
if (map->size() == 0)
|
||||
{
|
||||
@@ -292,7 +299,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::evaluate(mcsv1Context* context, static_an
|
||||
}
|
||||
|
||||
avg = data->fCount ? data->fSum / data->fCount : 0;
|
||||
typename std::unordered_map<T, uint32_t>::iterator iter;
|
||||
typename std::unordered_map<T, uint32_t, hasher<T> >::iterator iter;
|
||||
|
||||
for (iter = map->begin(); iter != map->end(); ++iter)
|
||||
{
|
||||
@@ -303,11 +310,13 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::evaluate(mcsv1Context* context, static_an
|
||||
}
|
||||
else if (iter->second == maxCnt)
|
||||
{
|
||||
T absval = val >= 0 ? val : -val;
|
||||
T absfirst = iter->first >= 0 ? iter->first : -iter->first;
|
||||
// Tie breaker: choose the closest to avg. If still tie, choose smallest
|
||||
long double dist1 = val > avg ? (long double)val-avg : avg-(long double)val;
|
||||
long double dist2 = iter->first > avg ? (long double)iter->first-avg : avg-(long double)iter->first;
|
||||
if ((dist1 > dist2)
|
||||
|| ((dist1 == dist2) && (std::fabs(val) > std::fabs(iter->first))))
|
||||
|| ((dist1 == dist2) && (absval > absfirst)))
|
||||
{
|
||||
val = iter->first;
|
||||
}
|
||||
@@ -328,7 +337,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::dropValue(mcsv1Context* context, ColumnDa
|
||||
{
|
||||
static_any::any& valDropped = valsDropped[0].columnData;
|
||||
ModaData* data = static_cast<ModaData*>(context->getUserData());
|
||||
std::unordered_map<T, uint32_t>* map = data->getMap<T>();
|
||||
std::unordered_map<T, uint32_t, hasher<T> >* map = data->getMap<T>();
|
||||
|
||||
if (valDropped.empty())
|
||||
{
|
||||
@@ -379,9 +388,12 @@ void ModaData::serialize(messageqcpp::ByteStream& bs) const
|
||||
case 4:
|
||||
serializeMap<int32_t>(bs);
|
||||
break;
|
||||
default:
|
||||
case 8:
|
||||
serializeMap<int64_t>(bs);
|
||||
break;
|
||||
case 16:
|
||||
serializeMap<int128_t>(bs);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case execplan::CalpontSystemCatalog::UTINYINT:
|
||||
@@ -447,9 +459,12 @@ void ModaData::unserialize(messageqcpp::ByteStream& bs)
|
||||
case 4:
|
||||
unserializeMap<int32_t>(bs);
|
||||
break;
|
||||
default:
|
||||
case 8:
|
||||
unserializeMap<int64_t>(bs);
|
||||
break;
|
||||
case 16:
|
||||
unserializeMap<int128_t>(bs);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case execplan::CalpontSystemCatalog::UTINYINT:
|
||||
@@ -519,10 +534,14 @@ void ModaData::cleanup()
|
||||
clear<int32_t>();
|
||||
deleteMap<int32_t>();
|
||||
break;
|
||||
default:
|
||||
case 8:
|
||||
clear<int64_t>();
|
||||
deleteMap<int64_t>();
|
||||
break;
|
||||
case 16:
|
||||
clear<int128_t>();
|
||||
deleteMap<int128_t>();
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case execplan::CalpontSystemCatalog::UTINYINT:
|
||||
|
@@ -45,6 +45,7 @@
|
||||
#include "mcsv1_udaf.h"
|
||||
#include "calpontsystemcatalog.h"
|
||||
#include "windowfunctioncolumn.h"
|
||||
#include "hasher.h"
|
||||
|
||||
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
|
||||
#define EXPORT __declspec(dllexport)
|
||||
@@ -54,6 +55,38 @@
|
||||
|
||||
namespace mcsv1sdk
|
||||
{
|
||||
// A hasher that handles int128_t
|
||||
template<class T>
|
||||
struct hasher
|
||||
{
|
||||
inline size_t operator()(T val) const
|
||||
{
|
||||
return fHasher((char*) &val, sizeof(T));
|
||||
}
|
||||
|
||||
private:
|
||||
utils::Hasher fHasher;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct hasher<long double>
|
||||
{
|
||||
inline size_t operator()(long double val) const
|
||||
{
|
||||
if (sizeof(long double) == 8) // Probably just MSC, but you never know.
|
||||
{
|
||||
return fHasher((char*) &val, sizeof(long double));
|
||||
}
|
||||
else
|
||||
{
|
||||
// For Linux x86_64, long double is stored in 128 bits, but only 80 are significant
|
||||
return fHasher((char*) &val, 10);
|
||||
}
|
||||
}
|
||||
private:
|
||||
utils::Hasher fHasher;
|
||||
};
|
||||
|
||||
// Override UserData for data storage
|
||||
struct ModaData : public UserData
|
||||
{
|
||||
@@ -69,22 +102,22 @@ struct ModaData : public UserData
|
||||
virtual void unserialize(messageqcpp::ByteStream& bs);
|
||||
|
||||
template<class T>
|
||||
std::unordered_map<T, uint32_t>* getMap()
|
||||
std::unordered_map<T, uint32_t, hasher<T> >* getMap()
|
||||
{
|
||||
if (!fMap)
|
||||
{
|
||||
// Just in time creation
|
||||
fMap = new std::unordered_map<T, uint32_t>;
|
||||
fMap = new std::unordered_map<T, uint32_t, hasher<T> >;
|
||||
}
|
||||
return (std::unordered_map<T, uint32_t>*) fMap;
|
||||
return (std::unordered_map<T, uint32_t, hasher<T> >*) fMap;
|
||||
}
|
||||
|
||||
// The const version is only called by serialize()
|
||||
// It shouldn't (and can't) create a new map.
|
||||
template<class T>
|
||||
std::unordered_map<T, uint32_t>* getMap() const
|
||||
std::unordered_map<T, uint32_t, hasher<T> >* getMap() const
|
||||
{
|
||||
return (std::unordered_map<T, uint32_t>*) fMap;
|
||||
return (std::unordered_map<T, uint32_t, hasher<T> >*) fMap;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
@@ -92,7 +125,7 @@ struct ModaData : public UserData
|
||||
{
|
||||
if (fMap)
|
||||
{
|
||||
delete (std::unordered_map<T, uint32_t>*) fMap;
|
||||
delete (std::unordered_map<T, uint32_t, hasher<T> >*) fMap;
|
||||
fMap = NULL;
|
||||
}
|
||||
}
|
||||
@@ -123,10 +156,10 @@ private:
|
||||
template<class T>
|
||||
void serializeMap(messageqcpp::ByteStream& bs) const
|
||||
{
|
||||
std::unordered_map<T, uint32_t>* map = getMap<T>();
|
||||
std::unordered_map<T, uint32_t, hasher<T> >* map = getMap<T>();
|
||||
if (map)
|
||||
{
|
||||
typename std::unordered_map<T, uint32_t>::const_iterator iter;
|
||||
typename std::unordered_map<T, uint32_t, hasher<T> >::const_iterator iter;
|
||||
bs << (uint64_t)map->size();
|
||||
for (iter = map->begin(); iter != map->end(); ++iter)
|
||||
{
|
||||
@@ -147,7 +180,7 @@ private:
|
||||
T num;
|
||||
uint64_t sz;
|
||||
bs >> sz;
|
||||
std::unordered_map<T, uint32_t>* map = getMap<T>();
|
||||
std::unordered_map<T, uint32_t, hasher<T> >* map = getMap<T>();
|
||||
map->clear();
|
||||
for (uint64_t i = 0; i < sz; ++i)
|
||||
{
|
||||
@@ -234,6 +267,7 @@ protected:
|
||||
Moda_impl_T<int16_t> moda_impl_int16;
|
||||
Moda_impl_T<int32_t> moda_impl_int32;
|
||||
Moda_impl_T<int64_t> moda_impl_int64;
|
||||
Moda_impl_T<int128_t> moda_impl_int128;
|
||||
Moda_impl_T<uint8_t> moda_impl_uint8;
|
||||
Moda_impl_T<uint16_t> moda_impl_uint16;
|
||||
Moda_impl_T<uint32_t> moda_impl_uint32;
|
||||
|
Reference in New Issue
Block a user