1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

Mcol 5092 MODA uses wrong column width for some types (#2450)

* MCOL-5092 Ensure column width is correct for datatype
                       Change MODA return type to STRING
                       Modify MODA to handle every numeric type
* MCOL-5162 MODA to support char and varchar with collation support

Fixes to the aggregate bit functions
When we fixed the storage sign issue for MCOL-5092, it uncovered a problem in the bit aggregates (bit_and, bit_or and bit_xor). These aggregates should always return UBIGINT, but they relied on the type of the argument column, which gave bad results.
This commit is contained in:
David.Hall
2022-08-11 15:16:11 -05:00
committed by GitHub
parent c906172bf5
commit 2020f35e88
16 changed files with 594 additions and 111 deletions

View File

@ -102,6 +102,12 @@ mcsv1_UDAF* moda::getImpl(mcsv1Context* context)
case execplan::CalpontSystemCatalog::FLOAT: data->modaImpl = &moda_impl_float; break;
case execplan::CalpontSystemCatalog::DOUBLE: data->modaImpl = &moda_impl_double; break;
case execplan::CalpontSystemCatalog::LONGDOUBLE: data->modaImpl = &moda_impl_longdouble; break;
case execplan::CalpontSystemCatalog::VARCHAR:
case execplan::CalpontSystemCatalog::CHAR:
data->modaImpl = &moda_impl_string;
break;
default: data->modaImpl = NULL;
}
return data->modaImpl;
@ -125,14 +131,16 @@ mcsv1_UDAF::ReturnCode moda::init(mcsv1Context* context, ColumnDatum* colTypes)
if (!(datatypes::isNumeric(colTypes[0].dataType)))
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("moda() with non-numeric argument");
return mcsv1_UDAF::ERROR;
if (colTypes[0].dataType != datatypes::SystemCatalog::VARCHAR &&
colTypes[0].dataType != datatypes::SystemCatalog::CHAR)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("moda() with invalid argument");
return mcsv1_UDAF::ERROR;
}
}
context->setResultType(colTypes[0].dataType);
if (colTypes[0].dataType == execplan::CalpontSystemCatalog::DECIMAL ||
colTypes[0].dataType == execplan::CalpontSystemCatalog::UDECIMAL)
{
@ -158,8 +166,10 @@ mcsv1_UDAF::ReturnCode moda::init(mcsv1Context* context, ColumnDatum* colTypes)
}
context->setScale(colTypes[0].scale);
context->setPrecision(colTypes[0].precision);
}
context->setPrecision(colTypes[0].precision);
context->setResultType(colTypes[0].dataType);
mcsv1_UDAF* impl = getImpl(context);
@ -167,7 +177,7 @@ mcsv1_UDAF::ReturnCode moda::init(mcsv1Context* context, ColumnDatum* colTypes)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("moda() with non-numeric argument");
context->setErrorMessage("moda() with implementation not found for data type");
return mcsv1_UDAF::ERROR;
}
@ -178,6 +188,13 @@ mcsv1_UDAF::ReturnCode moda::init(mcsv1Context* context, ColumnDatum* colTypes)
template <class T>
mcsv1_UDAF::ReturnCode Moda_impl_T<T>::init(mcsv1Context* context, ColumnDatum* colTypes)
{
if (!(colTypes[0].dataType == execplan::CalpontSystemCatalog::DECIMAL ||
colTypes[0].dataType == execplan::CalpontSystemCatalog::UDECIMAL))
{
context->setColWidth(sizeof(T));
context->setScale(0);
context->setPrecision(0);
}
return mcsv1_UDAF::SUCCESS;
}
@ -196,7 +213,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::nextValue(mcsv1Context* context, ColumnDa
{
static_any::any& valIn = valsIn[0].columnData;
ModaData* data = static_cast<ModaData*>(context->getUserData());
std::unordered_map<T, uint32_t, hasher<T> >* map = data->getMap<T>();
std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* map = data->getMap<T>();
if (valIn.empty())
{
@ -233,9 +250,9 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::subEvaluate(mcsv1Context* context, const
ModaData* outData = static_cast<ModaData*>(context->getUserData());
const ModaData* inData = static_cast<const ModaData*>(userDataIn);
std::unordered_map<T, uint32_t, hasher<T> >* outMap = outData->getMap<T>();
std::unordered_map<T, uint32_t, hasher<T> >* inMap = inData->getMap<T>();
typename std::unordered_map<T, uint32_t, hasher<T> >::const_iterator iter;
std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* outMap = outData->getMap<T>();
std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* inMap = inData->getMap<T>();
typename std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >::const_iterator iter;
for (iter = inMap->begin(); iter != inMap->end(); ++iter)
{
@ -255,7 +272,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::evaluate(mcsv1Context* context, static_an
long double avg = 0;
T val = 0;
ModaData* data = static_cast<ModaData*>(context->getUserData());
std::unordered_map<T, uint32_t, hasher<T> >* map = data->getMap<T>();
std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* map = data->getMap<T>();
if (map->size() == 0)
{
@ -264,7 +281,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::evaluate(mcsv1Context* context, static_an
}
avg = data->fCount ? data->fSum / data->fCount : 0;
typename std::unordered_map<T, uint32_t, hasher<T> >::iterator iter;
typename std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >::iterator iter;
for (iter = map->begin(); iter != map->end(); ++iter)
{
@ -301,7 +318,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::dropValue(mcsv1Context* context, ColumnDa
{
static_any::any& valDropped = valsDropped[0].columnData;
ModaData* data = static_cast<ModaData*>(context->getUserData());
std::unordered_map<T, uint32_t, hasher<T> >* map = data->getMap<T>();
std::unordered_map<T, uint32_t, hasher<T>, comparator<T> >* map = data->getMap<T>();
if (valDropped.empty())
{
@ -350,6 +367,9 @@ void ModaData::serialize(messageqcpp::ByteStream& bs) const
case execplan::CalpontSystemCatalog::FLOAT: serializeMap<float>(bs); break;
case execplan::CalpontSystemCatalog::DOUBLE: serializeMap<double>(bs); break;
case execplan::CalpontSystemCatalog::LONGDOUBLE: serializeMap<long double>(bs); break;
case execplan::CalpontSystemCatalog::CHAR:
case execplan::CalpontSystemCatalog::VARCHAR:
serializeMap<string>(bs); break;
default: throw std::runtime_error("ModaData::serialize with bad data type"); break;
}
}
@ -387,6 +407,9 @@ void ModaData::unserialize(messageqcpp::ByteStream& bs)
case execplan::CalpontSystemCatalog::FLOAT: unserializeMap<float>(bs); break;
case execplan::CalpontSystemCatalog::DOUBLE: unserializeMap<double>(bs); break;
case execplan::CalpontSystemCatalog::LONGDOUBLE: unserializeMap<long double>(bs); break;
case execplan::CalpontSystemCatalog::CHAR:
case execplan::CalpontSystemCatalog::VARCHAR:
unserializeMap<string>(bs); break;
default: throw std::runtime_error("ModaData::unserialize with bad data type"); break;
}
}
@ -469,6 +492,136 @@ void ModaData::cleanup()
clear<long double>();
deleteMap<long double>();
break;
default: throw std::runtime_error("ModaData::unserialize with bad data type"); break;
case execplan::CalpontSystemCatalog::CHAR:
case execplan::CalpontSystemCatalog::VARCHAR:
clear<string>();
deleteMap<string>();
break;
default: throw std::runtime_error("ModaData::cleanup with bad data type"); break;
}
}
/************************************************************************************************
* String Specialization
************************************************************************************************/
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::init(mcsv1Context* context, ColumnDatum* colTypes)
{
cs.setCharset(context->getCharsetNumber());
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::reset(mcsv1Context* context)
{
ModaData* data = static_cast<ModaData*>(context->getUserData());
data->fReturnType = context->getResultType();
data->fColWidth = context->getColWidth();
data->fCs_num = context->getCharsetNumber();
data->clear<string>();
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
{
static_any::any& valIn = valsIn[0].columnData;
ModaData* data = static_cast<ModaData*>(context->getUserData());
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* map = data->getMap<string>();
if (valIn.empty())
{
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
}
string val;
if (valIn.compatible(strTypeId))
val = valIn.cast<string>();
(*map)[val]++;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
{
if (!userDataIn)
{
return mcsv1_UDAF::SUCCESS;
}
ModaData* outData = static_cast<ModaData*>(context->getUserData());
const ModaData* inData = static_cast<const ModaData*>(userDataIn);
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* outMap = outData->getMap<string>();
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* inMap = inData->getMap<string>();
typename std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >::const_iterator iter;
for (iter = inMap->begin(); iter != inMap->end(); ++iter)
{
(*outMap)[iter->first] += iter->second;
}
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::evaluate(mcsv1Context* context, static_any::any& valOut)
{
uint64_t maxCnt = 0;
string val;
string lastVal;
ModaData* data = static_cast<ModaData*>(context->getUserData());
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* map = data->getMap<string>();
if (map->size() == 0)
{
valOut = string();
return mcsv1_UDAF::SUCCESS;
}
typename std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >::iterator iter;
for (iter = map->begin(); iter != map->end(); ++iter)
{
if (iter->second > maxCnt)
{
val = iter->first;
lastVal = val;
maxCnt = iter->second;
}
else if (iter->second == maxCnt)
{
// Tie breaker: choose smallest according to collation
if (cs.strnncollsp(val, lastVal) < 0)
{
val = iter->first;
}
}
}
// If scale is > 0, then the original type was DECIMAL. Set the
// ResultType to DECIMAL so the delivery logic moves the decimal point.
if (context->getScale() > 0)
context->setResultType(execplan::CalpontSystemCatalog::DECIMAL);
valOut = val;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode Moda_impl_T<string>::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
static_any::any& valDropped = valsDropped[0].columnData;
ModaData* data = static_cast<ModaData*>(context->getUserData());
std::unordered_map<string, uint32_t, hasher<string>, comparator<string> >* map = data->getMap<string>();
if (valDropped.empty())
{
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
}
string val = convertAnyTo<string>(valDropped);
--data->fCount;
(*map)[val]--;
return mcsv1_UDAF::SUCCESS;
}