1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-08 14:22:09 +03:00

MCOL-4171

This commit is contained in:
David Hall
2020-07-30 17:28:11 -05:00
committed by Roman Nozdrin
parent 5287e6860b
commit 638202417f
40 changed files with 807 additions and 250 deletions

View File

@@ -103,9 +103,12 @@ mcsv1_UDAF* moda::getImpl(mcsv1Context* context)
case 4:
data->modaImpl = &moda_impl_int32;
break;
default:
case 8:
data->modaImpl = &moda_impl_int64;
break;
case 16:
data->modaImpl = &moda_impl_int128;
break;
}
break;
case execplan::CalpontSystemCatalog::UTINYINT:
@@ -179,11 +182,18 @@ mcsv1_UDAF::ReturnCode moda::init(mcsv1Context* context,
{
context->setColWidth(4);
}
else
else if (colTypes[0].precision < 19)
{
context->setColWidth(8);
}
else
{
context->setColWidth(16);
}
context->setScale(colTypes[0].scale);
}
context->setPrecision(colTypes[0].precision);
mcsv1_UDAF* impl = getImpl(context);
@@ -203,10 +213,7 @@ template<class T>
mcsv1_UDAF::ReturnCode Moda_impl_T<T>::init(mcsv1Context* context,
ColumnDatum* colTypes)
{
context->setScale(context->getScale());
context->setPrecision(19);
return mcsv1_UDAF::SUCCESS;
}
template<class T>
@@ -224,7 +231,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::nextValue(mcsv1Context* context, ColumnDa
{
static_any::any& valIn = valsIn[0].columnData;
ModaData* data = static_cast<ModaData*>(context->getUserData());
std::unordered_map<T, uint32_t>* map = data->getMap<T>();
std::unordered_map<T, uint32_t, hasher<T> >* map = data->getMap<T>();
if (valIn.empty())
{
@@ -261,9 +268,9 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::subEvaluate(mcsv1Context* context, const
ModaData* outData = static_cast<ModaData*>(context->getUserData());
const ModaData* inData = static_cast<const ModaData*>(userDataIn);
std::unordered_map<T, uint32_t>* outMap = outData->getMap<T>();
std::unordered_map<T, uint32_t>* inMap = inData->getMap<T>();
typename std::unordered_map<T, uint32_t>::const_iterator iter;
std::unordered_map<T, uint32_t, hasher<T> >* outMap = outData->getMap<T>();
std::unordered_map<T, uint32_t, hasher<T> >* inMap = inData->getMap<T>();
typename std::unordered_map<T, uint32_t, hasher<T> >::const_iterator iter;
for (iter = inMap->begin(); iter != inMap->end(); ++iter)
{
@@ -283,7 +290,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::evaluate(mcsv1Context* context, static_an
long double avg = 0;
T val = 0;
ModaData* data = static_cast<ModaData*>(context->getUserData());
std::unordered_map<T, uint32_t>* map = data->getMap<T>();
std::unordered_map<T, uint32_t, hasher<T> >* map = data->getMap<T>();
if (map->size() == 0)
{
@@ -292,7 +299,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::evaluate(mcsv1Context* context, static_an
}
avg = data->fCount ? data->fSum / data->fCount : 0;
typename std::unordered_map<T, uint32_t>::iterator iter;
typename std::unordered_map<T, uint32_t, hasher<T> >::iterator iter;
for (iter = map->begin(); iter != map->end(); ++iter)
{
@@ -303,11 +310,13 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::evaluate(mcsv1Context* context, static_an
}
else if (iter->second == maxCnt)
{
T absval = val >= 0 ? val : -val;
T absfirst = iter->first >= 0 ? iter->first : -iter->first;
// Tie breaker: choose the closest to avg. If still tie, choose smallest
long double dist1 = val > avg ? (long double)val-avg : avg-(long double)val;
long double dist2 = iter->first > avg ? (long double)iter->first-avg : avg-(long double)iter->first;
if ((dist1 > dist2)
|| ((dist1 == dist2) && (std::fabs(val) > std::fabs(iter->first))))
|| ((dist1 == dist2) && (absval > absfirst)))
{
val = iter->first;
}
@@ -328,7 +337,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::dropValue(mcsv1Context* context, ColumnDa
{
static_any::any& valDropped = valsDropped[0].columnData;
ModaData* data = static_cast<ModaData*>(context->getUserData());
std::unordered_map<T, uint32_t>* map = data->getMap<T>();
std::unordered_map<T, uint32_t, hasher<T> >* map = data->getMap<T>();
if (valDropped.empty())
{
@@ -379,9 +388,12 @@ void ModaData::serialize(messageqcpp::ByteStream& bs) const
case 4:
serializeMap<int32_t>(bs);
break;
default:
case 8:
serializeMap<int64_t>(bs);
break;
case 16:
serializeMap<int128_t>(bs);
break;
}
break;
case execplan::CalpontSystemCatalog::UTINYINT:
@@ -447,9 +459,12 @@ void ModaData::unserialize(messageqcpp::ByteStream& bs)
case 4:
unserializeMap<int32_t>(bs);
break;
default:
case 8:
unserializeMap<int64_t>(bs);
break;
case 16:
unserializeMap<int128_t>(bs);
break;
}
break;
case execplan::CalpontSystemCatalog::UTINYINT:
@@ -519,10 +534,14 @@ void ModaData::cleanup()
clear<int32_t>();
deleteMap<int32_t>();
break;
default:
case 8:
clear<int64_t>();
deleteMap<int64_t>();
break;
case 16:
clear<int128_t>();
deleteMap<int128_t>();
break;
}
break;
case execplan::CalpontSystemCatalog::UTINYINT:

View File

@@ -45,6 +45,7 @@
#include "mcsv1_udaf.h"
#include "calpontsystemcatalog.h"
#include "windowfunctioncolumn.h"
#include "hasher.h"
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
#define EXPORT __declspec(dllexport)
@@ -54,6 +55,38 @@
namespace mcsv1sdk
{
// A hasher that handles int128_t
template<class T>
struct hasher
{
inline size_t operator()(T val) const
{
return fHasher((char*) &val, sizeof(T));
}
private:
utils::Hasher fHasher;
};
template<>
struct hasher<long double>
{
inline size_t operator()(long double val) const
{
if (sizeof(long double) == 8) // Probably just MSC, but you never know.
{
return fHasher((char*) &val, sizeof(long double));
}
else
{
// For Linux x86_64, long double is stored in 128 bits, but only 80 are significant
return fHasher((char*) &val, 10);
}
}
private:
utils::Hasher fHasher;
};
// Override UserData for data storage
struct ModaData : public UserData
{
@@ -69,22 +102,22 @@ struct ModaData : public UserData
virtual void unserialize(messageqcpp::ByteStream& bs);
template<class T>
std::unordered_map<T, uint32_t>* getMap()
std::unordered_map<T, uint32_t, hasher<T> >* getMap()
{
if (!fMap)
{
// Just in time creation
fMap = new std::unordered_map<T, uint32_t>;
fMap = new std::unordered_map<T, uint32_t, hasher<T> >;
}
return (std::unordered_map<T, uint32_t>*) fMap;
return (std::unordered_map<T, uint32_t, hasher<T> >*) fMap;
}
// The const version is only called by serialize()
// It shouldn't (and can't) create a new map.
template<class T>
std::unordered_map<T, uint32_t>* getMap() const
std::unordered_map<T, uint32_t, hasher<T> >* getMap() const
{
return (std::unordered_map<T, uint32_t>*) fMap;
return (std::unordered_map<T, uint32_t, hasher<T> >*) fMap;
}
template<class T>
@@ -92,7 +125,7 @@ struct ModaData : public UserData
{
if (fMap)
{
delete (std::unordered_map<T, uint32_t>*) fMap;
delete (std::unordered_map<T, uint32_t, hasher<T> >*) fMap;
fMap = NULL;
}
}
@@ -123,10 +156,10 @@ private:
template<class T>
void serializeMap(messageqcpp::ByteStream& bs) const
{
std::unordered_map<T, uint32_t>* map = getMap<T>();
std::unordered_map<T, uint32_t, hasher<T> >* map = getMap<T>();
if (map)
{
typename std::unordered_map<T, uint32_t>::const_iterator iter;
typename std::unordered_map<T, uint32_t, hasher<T> >::const_iterator iter;
bs << (uint64_t)map->size();
for (iter = map->begin(); iter != map->end(); ++iter)
{
@@ -147,7 +180,7 @@ private:
T num;
uint64_t sz;
bs >> sz;
std::unordered_map<T, uint32_t>* map = getMap<T>();
std::unordered_map<T, uint32_t, hasher<T> >* map = getMap<T>();
map->clear();
for (uint64_t i = 0; i < sz; ++i)
{
@@ -234,6 +267,7 @@ protected:
Moda_impl_T<int16_t> moda_impl_int16;
Moda_impl_T<int32_t> moda_impl_int32;
Moda_impl_T<int64_t> moda_impl_int64;
Moda_impl_T<int128_t> moda_impl_int128;
Moda_impl_T<uint8_t> moda_impl_uint8;
Moda_impl_T<uint16_t> moda_impl_uint16;
Moda_impl_T<uint32_t> moda_impl_uint32;