You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
MCOL-3503 add MODA aggregate function
This commit is contained in:
236
utils/regr/moda.h
Normal file
236
utils/regr/moda.h
Normal file
@ -0,0 +1,236 @@
|
||||
/* Copyright (C) 2019 MariaDB Corporation
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; version 2 of
|
||||
the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
MA 02110-1301, USA. */
|
||||
|
||||
/***********************************************************************
|
||||
* $Id$
|
||||
*
|
||||
* moda.h
|
||||
***********************************************************************/
|
||||
|
||||
/**
|
||||
* Columnstore interface for the moda User Defined Aggregate
|
||||
* Functions (UDAF) and User Defined Analytic Functions (UDAnF).
|
||||
*
|
||||
* To notify mysqld about the new function:
|
||||
*
|
||||
* CREATE AGGREGATE FUNCTION moda returns STRING soname 'libregr_mysql.so';
|
||||
*
|
||||
* moda returns the value with the greatest number of occurances in
|
||||
* the dataset with ties being broken by:
|
||||
* 1) closest to AVG
|
||||
* 2) smallest value
|
||||
*/
|
||||
#ifndef HEADER_moda
|
||||
#define HEADER_moda
|
||||
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "mcsv1_udaf.h"
|
||||
#include "calpontsystemcatalog.h"
|
||||
#include "windowfunctioncolumn.h"
|
||||
|
||||
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
|
||||
#define EXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define EXPORT
|
||||
#endif
|
||||
|
||||
namespace mcsv1sdk
|
||||
{
|
||||
// Override UserData for data storage
|
||||
struct ModaData : public UserData
|
||||
{
|
||||
ModaData() : fMap(NULL),
|
||||
fReturnType((uint32_t)execplan::CalpontSystemCatalog::UNDEFINED),
|
||||
fColWidth(0),
|
||||
modaImpl(NULL)
|
||||
{};
|
||||
|
||||
virtual ~ModaData() {}
|
||||
|
||||
virtual void serialize(messageqcpp::ByteStream& bs) const;
|
||||
virtual void unserialize(messageqcpp::ByteStream& bs);
|
||||
|
||||
template<class T>
|
||||
std::unordered_map<T, uint32_t>* getMap()
|
||||
{
|
||||
if (!fMap)
|
||||
{
|
||||
// Just in time creation
|
||||
fMap = new std::unordered_map<T, uint32_t>;
|
||||
}
|
||||
return (std::unordered_map<T, uint32_t>*) fMap;
|
||||
}
|
||||
|
||||
// The const version is only called by serialize()
|
||||
// It shouldn't (and can't) create a new map.
|
||||
template<class T>
|
||||
std::unordered_map<T, uint32_t>* getMap() const
|
||||
{
|
||||
if (!fMap)
|
||||
{
|
||||
throw std::runtime_error("ModaData::serialize with no map");
|
||||
}
|
||||
return (std::unordered_map<T, uint32_t>*) fMap;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void clear()
|
||||
{
|
||||
fSum = 0.0;
|
||||
fCount = 0;
|
||||
getMap<T>()->clear();
|
||||
}
|
||||
|
||||
long double fSum;
|
||||
uint64_t fCount;
|
||||
void* fMap; // Will be of type unordered_map<>
|
||||
uint32_t fReturnType;
|
||||
uint32_t fColWidth;
|
||||
mcsv1_UDAF* modaImpl; // A pointer to one of the Moda_impl_T concrete classes
|
||||
|
||||
private:
|
||||
// For now, copy construction is unwanted
|
||||
ModaData(UserData&);
|
||||
|
||||
// Templated map streamers
|
||||
template<class T>
|
||||
void serializeMap(messageqcpp::ByteStream& bs) const
|
||||
{
|
||||
std::unordered_map<T, uint32_t>* map = getMap<T>();
|
||||
typename std::unordered_map<T, uint32_t>::const_iterator iter;
|
||||
bs << (uint64_t)map->size();
|
||||
for (iter = map->begin(); iter != map->end(); ++iter)
|
||||
{
|
||||
bs << iter->first;
|
||||
bs << iter->second;
|
||||
}
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void unserializeMap(messageqcpp::ByteStream& bs)
|
||||
{
|
||||
uint32_t cnt;
|
||||
T num;
|
||||
uint64_t sz;
|
||||
bs >> sz;
|
||||
std::unordered_map<T, uint32_t>* map = getMap<T>();
|
||||
map->clear();
|
||||
for (uint64_t i = 0; i < sz; ++i)
|
||||
{
|
||||
bs >> num;
|
||||
bs >> cnt;
|
||||
(*map)[num] = cnt;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class T>
|
||||
class Moda_impl_T : public mcsv1_UDAF
|
||||
{
|
||||
public:
|
||||
// Defaults OK
|
||||
Moda_impl_T() {};
|
||||
virtual ~Moda_impl_T() {};
|
||||
|
||||
virtual mcsv1_UDAF::ReturnCode init(mcsv1Context* context,
|
||||
ColumnDatum* colTypes);
|
||||
|
||||
virtual mcsv1_UDAF::ReturnCode reset(mcsv1Context* context);
|
||||
virtual mcsv1_UDAF::ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
|
||||
virtual mcsv1_UDAF::ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn);
|
||||
virtual mcsv1_UDAF::ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
|
||||
virtual mcsv1_UDAF::ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
|
||||
|
||||
// Dummy: not used
|
||||
virtual mcsv1_UDAF::ReturnCode createUserData(UserData*& userData, int32_t& length)
|
||||
{
|
||||
return mcsv1_UDAF::SUCCESS;
|
||||
}
|
||||
};
|
||||
|
||||
// moda returns the modal value of the dataset. If more than one value
|
||||
// have the same maximum number of occurances, then the one closest to
|
||||
// AVG wins. If two are the same distance from AVG, then the smaller wins.
|
||||
class moda : public mcsv1_UDAF
|
||||
{
|
||||
public:
|
||||
// Defaults OK
|
||||
moda() : mcsv1_UDAF() {};
|
||||
virtual ~moda() {};
|
||||
|
||||
virtual mcsv1_UDAF::ReturnCode init(mcsv1Context* context,
|
||||
ColumnDatum* colTypes);
|
||||
|
||||
virtual ReturnCode reset(mcsv1Context* context)
|
||||
{
|
||||
return getImpl(context)->reset(context);
|
||||
}
|
||||
|
||||
virtual mcsv1_UDAF::ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn)
|
||||
{
|
||||
return getImpl(context)->nextValue(context, valsIn);
|
||||
}
|
||||
|
||||
virtual mcsv1_UDAF::ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn)
|
||||
{
|
||||
return getImpl(context)->subEvaluate(context, valIn);
|
||||
}
|
||||
|
||||
virtual mcsv1_UDAF::ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut)
|
||||
{
|
||||
return getImpl(context)->evaluate(context, valOut);
|
||||
}
|
||||
|
||||
virtual mcsv1_UDAF::ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
|
||||
{
|
||||
return getImpl(context)->dropValue(context, valsDropped);
|
||||
}
|
||||
|
||||
mcsv1_UDAF::ReturnCode createUserData(UserData*& userData, int32_t& length)
|
||||
{
|
||||
userData = new ModaData;
|
||||
length = sizeof(ModaData);
|
||||
return mcsv1_UDAF::SUCCESS;
|
||||
}
|
||||
|
||||
mcsv1_UDAF* getImpl(mcsv1Context* context);
|
||||
|
||||
protected:
|
||||
Moda_impl_T<int8_t> moda_impl_int8;
|
||||
Moda_impl_T<int16_t> moda_impl_int16;
|
||||
Moda_impl_T<int32_t> moda_impl_int32;
|
||||
Moda_impl_T<int64_t> moda_impl_int64;
|
||||
Moda_impl_T<uint8_t> moda_impl_uint8;
|
||||
Moda_impl_T<uint16_t> moda_impl_uint16;
|
||||
Moda_impl_T<uint32_t> moda_impl_uint32;
|
||||
Moda_impl_T<uint64_t> moda_impl_uint64;
|
||||
Moda_impl_T<float> moda_impl_float;
|
||||
Moda_impl_T<double> moda_impl_double;
|
||||
Moda_impl_T<long double> moda_impl_longdouble;
|
||||
};
|
||||
|
||||
|
||||
}; // namespace
|
||||
|
||||
#undef EXPORT
|
||||
|
||||
#endif // HEADER_mode.h
|
||||
|
Reference in New Issue
Block a user