1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-1698 get DISTINCT working for UDAnF

This commit is contained in:
David Hall
2018-09-11 12:02:05 -05:00
parent 1b4063666b
commit 70cec8f484
4 changed files with 70 additions and 100 deletions

View File

@ -58,7 +58,7 @@ UDAF_MAP& UDAFMap::getMap()
// the function names passed to the interface is always in lower case.
fm["allnull"] = new allnull();
fm["ssq"] = new ssq();
fm["median"] = new median();
// fm["median"] = new median();
fm["avg_mode"] = new avg_mode();
fm["avgx"] = new avgx();

View File

@ -189,6 +189,7 @@ static uint64_t UDAF_WINDOWFRAME_REQUIRED __attribute__ ((unused)) = 1 << 4; //
static uint64_t UDAF_WINDOWFRAME_ALLOWED __attribute__ ((unused)) = 1 << 5; // If used as UDAnF, a WINDOW FRAME is optional
static uint64_t UDAF_MAYBE_NULL __attribute__ ((unused)) = 1 << 6; // If UDA(n)F might return NULL.
static uint64_t UDAF_IGNORE_NULLS __attribute__ ((unused)) = 1 << 7; // If UDA(n)F wants NULL rows suppressed.
static uint64_t UDAF_DISTINCT __attribute__ ((unused)) = 1 << 8; // Force UDA(n)F to be distinct on first param.
// Flags set by the framework to define the context of the call.
// User code shouldn't use these directly

View File

@ -82,7 +82,7 @@ WindowFunctionType* WF_udaf::clone() const
void WF_udaf::resetData()
{
getContext().getFunction()->reset(&getContext());
fDistinctSet.clear();
fDistinctMap.clear();
WindowFunctionType::resetData();
}
@ -179,8 +179,6 @@ bool WF_udaf::dropValues(int64_t b, int64_t e)
case CalpontSystemCatalog::MEDINT:
case CalpontSystemCatalog::INT:
case CalpontSystemCatalog::BIGINT:
case CalpontSystemCatalog::DATE:
case CalpontSystemCatalog::DATETIME:
{
int64_t valIn;
@ -270,6 +268,9 @@ bool WF_udaf::dropValues(int64_t b, int64_t e)
case CalpontSystemCatalog::UMEDINT:
case CalpontSystemCatalog::UINT:
case CalpontSystemCatalog::UBIGINT:
case CalpontSystemCatalog::TIME:
case CalpontSystemCatalog::DATE:
case CalpontSystemCatalog::DATETIME:
{
uint64_t valIn;
@ -622,8 +623,6 @@ void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut,
case execplan::CalpontSystemCatalog::BIGINT:
case execplan::CalpontSystemCatalog::DECIMAL:
case execplan::CalpontSystemCatalog::UDECIMAL:
case execplan::CalpontSystemCatalog::DATE:
case execplan::CalpontSystemCatalog::DATETIME:
if (valOut.empty())
{
setValue(colDataType, b, e, c, (int64_t*)NULL);
@ -795,8 +794,6 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c)
case CalpontSystemCatalog::MEDINT:
case CalpontSystemCatalog::INT:
case CalpontSystemCatalog::BIGINT:
case CalpontSystemCatalog::DATE:
case CalpontSystemCatalog::DATETIME:
{
int64_t valIn;
@ -811,29 +808,23 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c)
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
if (k == 0 && fDistinct)
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
// MCOL-1698
std::pair<static_any::any, uint64_t> val = make_pair(valIn, 1);
// Unordered_map will not insert a duplicate key (valIn).
// If it doesn't insert, the original pair will be returned
// in distinct.first and distinct.second will be a bool --
// true if newly inserted, false if a duplicate.
std::pair<typename DistinctMap::iterator, bool> distinct;
distinct = fDistinctMap.insert(val);
if (distinct.second == false)
{
// MCOL-1698
std::pair<static_any::any, uint64_t> val = make_pair(valIn, 1);
// Unordered_map will not insert a duplicate key (valIn).
// If it doesn't insert, the original pair will be returned
// in distinct.first and distinct.second will be a bool --
// true if newly inserted, false if a duplicate.
std::pair<typename DistinctMap::iterator, bool> distinct;
distinct = fDistinctMap.insert(val);
if (distinct.second == false)
{
// This is a duplicate: increment the count
++(*distinct.first).second;
bSkipIt = true;
continue;
}
// This is a duplicate: increment the count
++(*distinct.first).second;
bSkipIt = true;
continue;
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;
@ -856,23 +847,17 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c)
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
if (k == 0 && fDistinct)
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
std::pair<static_any::any, uint64_t> val = make_pair(valIn, 1);
std::pair<typename DistinctMap::iterator, bool> distinct;
distinct = fDistinctMap.insert(val);
if (distinct.second == false)
{
std::pair<static_any::any, uint64_t> val = make_pair(valIn, 1);
std::pair<typename DistinctMap::iterator, bool> distinct;
distinct = fDistinctMap.insert(val);
if (distinct.second == false)
{
++(*distinct.first).second;
bSkipIt = true;
continue;
}
++(*distinct.first).second;
bSkipIt = true;
continue;
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;
@ -884,6 +869,9 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c)
case CalpontSystemCatalog::UMEDINT:
case CalpontSystemCatalog::UINT:
case CalpontSystemCatalog::UBIGINT:
case CalpontSystemCatalog::TIME:
case CalpontSystemCatalog::DATE:
case CalpontSystemCatalog::DATETIME:
{
uint64_t valIn;
@ -898,23 +886,17 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c)
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
if (k == 0 && fDistinct)
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
std::pair<static_any::any, uint64_t> val = make_pair(valIn, 1);
std::pair<typename DistinctMap::iterator, bool> distinct;
distinct = fDistinctMap.insert(val);
if (distinct.second == false)
{
std::pair<static_any::any, uint64_t> val = make_pair(valIn, 1);
std::pair<typename DistinctMap::iterator, bool> distinct;
distinct = fDistinctMap.insert(val);
if (distinct.second == false)
{
++(*distinct.first).second;
bSkipIt = true;
continue;
}
++(*distinct.first).second;
bSkipIt = true;
continue;
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;
@ -937,23 +919,17 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c)
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
if (k == 0 && fDistinct)
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
std::pair<static_any::any, uint64_t> val = make_pair(valIn, 1);
std::pair<typename DistinctMap::iterator, bool> distinct;
distinct = fDistinctMap.insert(val);
if (distinct.second == false)
{
std::pair<static_any::any, uint64_t> val = make_pair(valIn, 1);
std::pair<typename DistinctMap::iterator, bool> distinct;
distinct = fDistinctMap.insert(val);
if (distinct.second == false)
{
++(*distinct.first).second;
bSkipIt = true;
continue;
}
++(*distinct.first).second;
bSkipIt = true;
continue;
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;
@ -976,23 +952,17 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c)
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
if (k == 0 && fDistinct)
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
std::pair<static_any::any, uint64_t> val = make_pair(valIn, 1);
std::pair<typename DistinctMap::iterator, bool> distinct;
distinct = fDistinctMap.insert(val);
if (distinct.second == false)
{
std::pair<static_any::any, uint64_t> val = make_pair(valIn, 1);
std::pair<typename DistinctMap::iterator, bool> distinct;
distinct = fDistinctMap.insert(val);
if (distinct.second == false)
{
++(*distinct.first).second;
bSkipIt = true;
continue;
}
++(*distinct.first).second;
bSkipIt = true;
continue;
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;
@ -1018,23 +988,17 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c)
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
if (k == 0 && fDistinct)
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
std::pair<static_any::any, uint64_t> val = make_pair(valIn, 1);
std::pair<typename DistinctMap::iterator, bool> distinct;
distinct = fDistinctMap.insert(val);
if (distinct.second == false)
{
std::pair<static_any::any, uint64_t> val = make_pair(valIn, 1);
std::pair<typename DistinctMap::iterator, bool> distinct;
distinct = fDistinctMap.insert(val);
if (distinct.second == false)
{
++(*distinct.first).second;
bSkipIt = true;
continue;
}
++(*distinct.first).second;
bSkipIt = true;
continue;
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;

View File

@ -22,9 +22,9 @@
#define UTILS_WF_UDAF_H
#ifndef _MSC_VER
#include <tr1/unordered_set>
#include <tr1/unordered_map>
#else
#include <unordered_set>
#include <unordered_map>
#endif
#include "windowfunctiontype.h"
#include "mcsv1_udaf.h"
@ -83,6 +83,11 @@ public:
return fDistinct;
}
void setDistinct(bool d = true)
{
fDistinct = d;
}
protected:
void SetUDAFValue(static_any::any& valOut, int64_t colOut, int64_t b, int64_t e, int64_t c);