1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-1201 manual rebase with develop. Obsoletes branch MCOL-1201

This commit is contained in:
David Hall
2018-05-11 09:50:10 -05:00
parent 12b1d99f51
commit 6fa7dded6f
30 changed files with 2255 additions and 1196 deletions

View File

@ -98,36 +98,6 @@ AggregateColumn::AggregateColumn(const uint32_t sessionID):
{
}
AggregateColumn::AggregateColumn(const AggOp aggOp, ReturnedColumn* parm, const uint32_t sessionID):
ReturnedColumn(sessionID),
fAggOp(aggOp),
fAsc(false),
fData(aggOp + "(" + parm->data() + ")")
{
fFunctionParms.reset(parm);
}
AggregateColumn::AggregateColumn(const AggOp aggOp, const string& content, const uint32_t sessionID):
ReturnedColumn(sessionID),
fAggOp(aggOp),
fAsc(false),
fData(aggOp + "(" + content + ")")
{
// TODO: need to handle distinct
fFunctionParms.reset(new ArithmeticColumn(content));
}
// deprecated constructor. use function name as string
AggregateColumn::AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID):
ReturnedColumn(sessionID),
fFunctionName(functionName),
fAggOp(NOOP),
fAsc(false),
fData(functionName + "(" + parm->data() + ")")
{
fFunctionParms.reset(parm);
}
// deprecated constructor. use function name as string
AggregateColumn::AggregateColumn(const string& functionName, const string& content, const uint32_t sessionID):
ReturnedColumn(sessionID),
@ -137,20 +107,21 @@ AggregateColumn::AggregateColumn(const string& functionName, const string& conte
fData(functionName + "(" + content + ")")
{
// TODO: need to handle distinct
fFunctionParms.reset(new ArithmeticColumn(content));
SRCP srcp(new ArithmeticColumn(content));
fAggParms.push_back(srcp);
}
AggregateColumn::AggregateColumn( const AggregateColumn& rhs, const uint32_t sessionID ):
ReturnedColumn(rhs, sessionID),
fFunctionName (rhs.fFunctionName),
fAggOp(rhs.fAggOp),
fFunctionParms(rhs.fFunctionParms),
fTableAlias(rhs.tableAlias()),
fAsc(rhs.asc()),
fData(rhs.data()),
fConstCol(rhs.fConstCol)
{
fAlias = rhs.alias();
fAggParms = rhs.fAggParms;
}
/**
@ -166,10 +137,14 @@ const string AggregateColumn::toString() const
if (fAlias.length() > 0) output << "/Alias: " << fAlias << endl;
if (fFunctionParms == 0)
output << "No arguments" << endl;
if (fAggParms.size() == 0)
output << "No arguments";
else
output << *fFunctionParms << endl;
for (uint32_t i = 0; i < fAggParms.size(); ++i)
{
output << *(fAggParms[i]) << " ";
}
output << endl;
if (fConstCol)
output << *fConstCol;
@ -191,10 +166,11 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const
b << fFunctionName;
b << static_cast<uint8_t>(fAggOp);
if (fFunctionParms == 0)
b << (uint8_t) ObjectReader::NULL_CLASS;
else
fFunctionParms->serialize(b);
b << static_cast<uint32_t>(fAggParms.size());
for (uint32_t i = 0; i < fAggParms.size(); ++i)
{
fAggParms[i]->serialize(b);
}
b << static_cast<uint32_t>(fGroupByColList.size());
@ -219,20 +195,26 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const
void AggregateColumn::unserialize(messageqcpp::ByteStream& b)
{
ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN);
fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end());
fProjectColList.erase(fProjectColList.begin(), fProjectColList.end());
ReturnedColumn::unserialize(b);
b >> fFunctionName;
b >> fAggOp;
//delete fFunctionParms;
fFunctionParms.reset(
dynamic_cast<ReturnedColumn*>(ObjectReader::createTreeNode(b)));
messageqcpp::ByteStream::quadbyte size;
messageqcpp::ByteStream::quadbyte i;
ReturnedColumn* rc;
ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN);
fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end());
fProjectColList.erase(fProjectColList.begin(), fProjectColList.end());
fAggParms.erase(fAggParms.begin(), fAggParms.end());
ReturnedColumn::unserialize(b);
b >> fFunctionName;
b >> fAggOp;
b >> size;
for (i = 0; i < size; i++)
{
rc = dynamic_cast<ReturnedColumn*>(ObjectReader::createTreeNode(b));
SRCP srcp(rc);
fAggParms.push_back(srcp);
}
b >> size;
for (i = 0; i < size; i++)
@ -261,6 +243,7 @@ void AggregateColumn::unserialize(messageqcpp::ByteStream& b)
bool AggregateColumn::operator==(const AggregateColumn& t) const
{
const ReturnedColumn* rc1, *rc2;
AggParms::const_iterator it, it2;
rc1 = static_cast<const ReturnedColumn*>(this);
rc2 = static_cast<const ReturnedColumn*>(&t);
@ -277,16 +260,18 @@ bool AggregateColumn::operator==(const AggregateColumn& t) const
if (fAggOp != t.fAggOp)
return false;
if (fFunctionParms.get() != NULL && t.fFunctionParms.get() != NULL)
if (aggParms().size() != t.aggParms().size())
{
if (*fFunctionParms.get() != t.fFunctionParms.get())
return false;
}
for (it = fAggParms.begin(), it2 = t.fAggParms.begin();
it != fAggParms.end();
++it, ++it2)
{
if (**it != **it2)
return false;
}
else if (fFunctionParms.get() != NULL || t.fFunctionParms.get() != NULL)
return false;
//if (fAlias != t.fAlias)
// return false;
if (fTableAlias != t.fTableAlias)
return false;
@ -645,3 +630,4 @@ AggregateColumn::AggOp AggregateColumn::agname2num(const string& agname)
}
} // namespace execplan

View File

@ -40,6 +40,8 @@ class ByteStream;
namespace execplan
{
typedef std::vector<execplan::SRCP> AggParms;
/**
* @brief A class to represent a aggregate return column
*
@ -74,7 +76,8 @@ public:
BIT_OR,
BIT_XOR,
GROUP_CONCAT,
UDAF
UDAF,
MULTI_PARM
};
/**
@ -94,21 +97,6 @@ public:
*/
AggregateColumn(const uint32_t sessionID);
/**
* ctor
*/
AggregateColumn(const AggOp aggop, ReturnedColumn* parm, const uint32_t sessionID = 0);
/**
* ctor
*/
AggregateColumn(const AggOp aggop, const std::string& content, const uint32_t sessionID = 0);
/**
* ctor
*/
AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID = 0);
/**
* ctor
*/
@ -155,24 +143,27 @@ public:
fAggOp = aggOp;
}
/** get function parms
*
* set the function parms from this object
*/
virtual const SRCP functionParms() const
virtual AggParms& aggParms()
{
return fFunctionParms;
return fAggParms;
}
virtual const AggParms& aggParms() const
{
return fAggParms;
}
/** set function parms
*
* set the function parms for this object
*/
virtual void functionParms(const SRCP& functionParms)
virtual void aggParms(const AggParms& parms)
{
fFunctionParms = functionParms;
fAggParms = parms;
}
/** return a copy of this pointer
*
* deep copy of this pointer and return the copy
@ -325,9 +316,10 @@ protected:
uint8_t fAggOp;
/**
* A ReturnedColumn objects that are the arguments to this function
* ReturnedColumn objects that are the arguments to this
* function
*/
SRCP fFunctionParms;
AggParms fAggParms;
/** table alias
* A string to represent table alias name which contains this column

View File

@ -56,6 +56,17 @@ using namespace rowgroup;
namespace joblist
{
ExpressionStep::ExpressionStep() :
fExpressionFilter(NULL),
fExpressionId(-1),
fVarBinOK(false),
fSelectFilter(false),
fAssociatedJoinId(0),
fDoJoin(false),
fVirtual(false)
{
}
ExpressionStep::ExpressionStep(const JobInfo& jobInfo) :
JobStep(jobInfo),
fExpressionFilter(NULL),
@ -68,7 +79,6 @@ ExpressionStep::ExpressionStep(const JobInfo& jobInfo) :
{
}
ExpressionStep::ExpressionStep(const ExpressionStep& rhs) :
JobStep(rhs),
fExpression(rhs.expression()),

View File

@ -50,6 +50,7 @@ class ExpressionStep : public JobStep
{
public:
// constructors
ExpressionStep();
ExpressionStep(const JobInfo&);
// destructor constructors
virtual ~ExpressionStep();

View File

@ -78,7 +78,7 @@ void GroupConcatInfo::prepGroupConcat(JobInfo& jobInfo)
while (i != jobInfo.groupConcatCols.end())
{
GroupConcatColumn* gcc = dynamic_cast<GroupConcatColumn*>(i->get());
const RowColumn* rcp = dynamic_cast<const RowColumn*>(gcc->functionParms().get());
const RowColumn* rcp = dynamic_cast<const RowColumn*>(gcc->aggParms()[0].get());
SP_GroupConcat groupConcat(new GroupConcat);
groupConcat->fSeparator = gcc->separator();

View File

@ -18,7 +18,6 @@
// $Id: joblistfactory.cpp 9632 2013-06-18 22:18:20Z xlou $
#include <iostream>
#include <stack>
#include <iterator>
@ -870,7 +869,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo
if (gcc != NULL)
{
srcp = gcc->functionParms();
srcp = gcc->aggParms()[0];
const RowColumn* rcp = dynamic_cast<const RowColumn*>(srcp.get());
const vector<SRCP>& cols = rcp->columnVec();
@ -891,21 +890,55 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo
continue;
}
#if 0
// MCOL-1201 Add support for multi-parameter UDAnF
UDAFColumn* udafc = dynamic_cast<UDAFColumn*>(retCols[i].get());
if (udafc != NULL)
{
srcp = udafc->aggParms()[0];
const RowColumn* rcp = dynamic_cast<const RowColumn*>(srcp.get());
const vector<SRCP>& cols = rcp->columnVec();
for (vector<SRCP>::const_iterator j = cols.begin(); j != cols.end(); j++)
{
srcp = *j;
if (dynamic_cast<const ConstantColumn*>(srcp.get()) == NULL)
retCols.push_back(srcp);
// Do we need this?
const ArithmeticColumn* ac = dynamic_cast<const ArithmeticColumn*>(srcp.get());
const FunctionColumn* fc = dynamic_cast<const FunctionColumn*>(srcp.get());
if (ac != NULL || fc != NULL)
{
// bug 3728, make a dummy expression step for each expression.
scoped_ptr<ExpressionStep> es(new ExpressionStep(jobInfo));
es->expression(srcp, jobInfo);
}
}
continue;
}
#endif
srcp = retCols[i];
const AggregateColumn* ag = dynamic_cast<const AggregateColumn*>(retCols[i].get());
if (ag != NULL)
srcp = ag->functionParms();
const ArithmeticColumn* ac = dynamic_cast<const ArithmeticColumn*>(srcp.get());
const FunctionColumn* fc = dynamic_cast<const FunctionColumn*>(srcp.get());
if (ac != NULL || fc != NULL)
// bug 3728 Make a dummy expression for srcp if it is an
// expression. This is needed to fill in some stuff.
// Note that es.expression does nothing if the item is not an expression.
if (ag == NULL)
{
// bug 3728, make a dummy expression step for each expression.
scoped_ptr<ExpressionStep> es(new ExpressionStep(jobInfo));
es->expression(srcp, jobInfo);
// Not an aggregate. Make a dummy expression for the item
ExpressionStep es;
es.expression(srcp, jobInfo);
}
else
{
// MCOL-1201 multi-argument aggregate. make a dummy expression
// step for each argument that is an expression.
for (uint32_t i = 0; i < ag->aggParms().size(); ++i)
{
srcp = ag->aggParms()[i];
ExpressionStep es;
es.expression(srcp, jobInfo);
}
}
}
@ -915,17 +948,18 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo
{
srcp = retCols[i];
const SimpleColumn* sc = dynamic_cast<const SimpleColumn*>(srcp.get());
AggregateColumn* aggc = dynamic_cast<AggregateColumn*>(srcp.get());
bool doDistinct = (csep->distinct() && csep->groupByCols().empty());
uint32_t tupleKey = -1;
string alias;
string view;
// returned column could be groupby column, a simplecoulumn not a agregatecolumn
// returned column could be groupby column, a simplecoulumn not an aggregatecolumn
int op = 0;
CalpontSystemCatalog::OID dictOid = 0;
CalpontSystemCatalog::ColType ct, aggCt;
if (sc == NULL)
if (aggc)
{
GroupConcatColumn* gcc = dynamic_cast<GroupConcatColumn*>(retCols[i].get());
@ -939,7 +973,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo
tupleKey = ti.key;
jobInfo.returnedColVec.push_back(make_pair(tupleKey, gcc->aggOp()));
// not a tokenOnly column. Mark all the columns involved
srcp = gcc->functionParms();
srcp = gcc->aggParms()[0];
const RowColumn* rowCol = dynamic_cast<const RowColumn*>(srcp.get());
if (rowCol)
@ -963,186 +997,353 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo
continue;
}
AggregateColumn* ac = dynamic_cast<AggregateColumn*>(retCols[i].get());
if (ac != NULL)
else
{
srcp = ac->functionParms();
sc = dynamic_cast<const SimpleColumn*>(srcp.get());
// Aggregate column not group concat
AggParms& aggParms = aggc->aggParms();
if (ac->constCol().get() != NULL)
for (uint32_t parm = 0; parm < aggParms.size(); ++parm)
{
// replace the aggregate on constant with a count(*)
SRCP clone;
UDAFColumn* udafc = dynamic_cast<UDAFColumn*>(ac);
if (udafc)
if (aggc->constCol().get() != NULL)
{
clone.reset(new UDAFColumn(*udafc, ac->sessionID()));
// replace the aggregate on constant with a count(*)
SRCP clone;
UDAFColumn* udafc = dynamic_cast<UDAFColumn*>(aggc);
if (udafc)
{
clone.reset(new UDAFColumn(*udafc, aggc->sessionID()));
}
else
{
clone.reset(new AggregateColumn(*aggc, aggc->sessionID()));
}
jobInfo.constAggregate.insert(make_pair(i, clone));
aggc->aggOp(AggregateColumn::COUNT_ASTERISK);
aggc->distinct(false);
}
srcp = aggParms[parm];
sc = dynamic_cast<const SimpleColumn*>(srcp.get());
if (parm == 0)
{
op = aggc->aggOp();
}
else
{
clone.reset(new AggregateColumn(*ac, ac->sessionID()));
op = AggregateColumn::MULTI_PARM;
}
doDistinct = aggc->distinct();
if (aggParms.size() == 1)
{
// Set the col type based on the single parm.
// Changing col type based on a parm if multiple parms
// doesn't really make sense.
updateAggregateColType(aggc, srcp, op, jobInfo);
}
aggCt = aggc->resultType();
// As of bug3695, make sure varbinary is not used in aggregation.
// TODO: allow for UDAF
if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY)
throw runtime_error ("VARBINARY in aggregate function is not supported.");
// Project the parm columns or expressions
if (sc != NULL)
{
CalpontSystemCatalog::OID retOid = sc->oid();
CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc);
alias = extractTableAlias(sc);
view = sc->viewName();
if (!sc->schemaName().empty())
{
ct = sc->colType();
//XXX use this before connector sets colType in sc correctly.
if (sc->isInfiniDB() && dynamic_cast<const PseudoColumn*>(sc) == NULL)
ct = jobInfo.csc->colType(sc->oid());
//X
dictOid = isDictCol(ct);
}
else
{
retOid = (tblOid + 1) + sc->colPosition();
ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")];
}
TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias));
tupleKey = ti.key;
// this is a string column
if (dictOid > 0)
{
map<uint32_t, bool>::iterator findit = jobInfo.tokenOnly.find(tupleKey);
// if the column has never seen, and the op is count: possible need count only.
if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op)
{
if (findit == jobInfo.tokenOnly.end())
jobInfo.tokenOnly[tupleKey] = true;
}
// if aggregate other than count, token is not enough.
else if (op != 0 || doDistinct)
{
jobInfo.tokenOnly[tupleKey] = false;
}
findit = jobInfo.tokenOnly.find(tupleKey);
if (!(findit != jobInfo.tokenOnly.end() && findit->second == true))
{
dictMap[tupleKey] = dictOid;
jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid;
ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias);
jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key;
}
}
}
else
{
const ArithmeticColumn* ac = NULL;
const FunctionColumn* fc = NULL;
const WindowFunctionColumn* wc = NULL;
bool hasAggCols = false;
if ((ac = dynamic_cast<const ArithmeticColumn*>(srcp.get())) != NULL)
{
if (ac->aggColumnList().size() > 0)
hasAggCols = true;
}
else if ((fc = dynamic_cast<const FunctionColumn*>(srcp.get())) != NULL)
{
if (fc->aggColumnList().size() > 0)
hasAggCols = true;
}
else if (dynamic_cast<const AggregateColumn*>(srcp.get()) != NULL)
{
std::ostringstream errmsg;
errmsg << "Invalid aggregate function nesting.";
cerr << boldStart << errmsg.str() << boldStop << endl;
throw logic_error(errmsg.str());
}
else if (dynamic_cast<const ConstantColumn*>(srcp.get()) != NULL)
{
}
else if ((wc = dynamic_cast<const WindowFunctionColumn*>(srcp.get())) == NULL)
{
std::ostringstream errmsg;
errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name();
cerr << boldStart << errmsg.str() << boldStop << endl;
throw logic_error(errmsg.str());
}
uint64_t eid = srcp.get()->expressionId();
ct = srcp.get()->resultType();
TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo));
tupleKey = ti.key;
if (hasAggCols)
jobInfo.expressionVec.push_back(tupleKey);
}
jobInfo.constAggregate.insert(make_pair(i, clone));
ac->aggOp(AggregateColumn::COUNT_ASTERISK);
ac->distinct(false);
}
// add to project list
vector<uint32_t>::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey);
op = ac->aggOp();
doDistinct = ac->distinct();
updateAggregateColType(ac, srcp, op, jobInfo);
aggCt = ac->resultType();
if (keyIt == projectKeys.end())
{
RetColsVector::iterator it = pcv.end();
// As of bug3695, make sure varbinary is not used in aggregation.
if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY)
throw runtime_error ("VARBINARY in aggregate function is not supported.");
}
}
if (doDistinct)
it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp);
else
it = pcv.insert(pcv.end(), srcp);
// simple column selected or aggregated
if (sc != NULL)
{
// one column only need project once
CalpontSystemCatalog::OID retOid = sc->oid();
CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc);
alias = extractTableAlias(sc);
view = sc->viewName();
projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey);
}
else if (doDistinct) // @bug4250, move forward distinct column if necessary.
{
uint32_t pos = distance(projectKeys.begin(), keyIt);
if (!sc->schemaName().empty())
{
ct = sc->colType();
if (pos >= lastGroupByPos)
{
pcv[pos] = pcv[lastGroupByPos];
pcv[lastGroupByPos] = srcp;
projectKeys[pos] = projectKeys[lastGroupByPos];
projectKeys[lastGroupByPos] = tupleKey;
lastGroupByPos++;
}
}
//XXX use this before connector sets colType in sc correctly.
if (sc->isInfiniDB() && dynamic_cast<const PseudoColumn*>(sc) == NULL)
ct = jobInfo.csc->colType(sc->oid());
if (doDistinct && dictOid > 0)
tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey];
//X
dictOid = isDictCol(ct);
}
else
{
retOid = (tblOid + 1) + sc->colPosition();
ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")];
}
// remember the columns to be returned
jobInfo.returnedColVec.push_back(make_pair(tupleKey, op));
TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias));
tupleKey = ti.key;
if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG)
jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale;
// this is a string column
if (dictOid > 0)
{
map<uint32_t, bool>::iterator findit = jobInfo.tokenOnly.find(tupleKey);
// if the column has never seen, and the op is count: possible need count only.
if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op)
{
if (findit == jobInfo.tokenOnly.end())
jobInfo.tokenOnly[tupleKey] = true;
}
// if aggregate other than count, token is not enough.
else if (op != 0 || doDistinct)
{
jobInfo.tokenOnly[tupleKey] = false;
}
findit = jobInfo.tokenOnly.find(tupleKey);
if (!(findit != jobInfo.tokenOnly.end() && findit->second == true))
{
dictMap[tupleKey] = dictOid;
jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid;
ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias);
jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key;
// bug 1499 distinct processing, save unique distinct columns
if (doDistinct &&
(jobInfo.distinctColVec.end() ==
find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey)))
{
jobInfo.distinctColVec.push_back(tupleKey);
}
}
}
}
else
{
const ArithmeticColumn* ac = NULL;
const FunctionColumn* fc = NULL;
const WindowFunctionColumn* wc = NULL;
bool hasAggCols = false;
if ((ac = dynamic_cast<const ArithmeticColumn*>(srcp.get())) != NULL)
// Not an Aggregate
// simple column selected
if (sc != NULL)
{
if (ac->aggColumnList().size() > 0)
hasAggCols = true;
// one column only need project once
CalpontSystemCatalog::OID retOid = sc->oid();
CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc);
alias = extractTableAlias(sc);
view = sc->viewName();
if (!sc->schemaName().empty())
{
ct = sc->colType();
//XXX use this before connector sets colType in sc correctly.
if (sc->isInfiniDB() && dynamic_cast<const PseudoColumn*>(sc) == NULL)
ct = jobInfo.csc->colType(sc->oid());
//X
dictOid = isDictCol(ct);
}
else
{
retOid = (tblOid + 1) + sc->colPosition();
ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")];
}
TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias));
tupleKey = ti.key;
// this is a string column
if (dictOid > 0)
{
map<uint32_t, bool>::iterator findit = jobInfo.tokenOnly.find(tupleKey);
// if the column has never seen, and the op is count: possible need count only.
if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op)
{
if (findit == jobInfo.tokenOnly.end())
jobInfo.tokenOnly[tupleKey] = true;
}
// if aggregate other than count, token is not enough.
else if (op != 0 || doDistinct)
{
jobInfo.tokenOnly[tupleKey] = false;
}
findit = jobInfo.tokenOnly.find(tupleKey);
if (!(findit != jobInfo.tokenOnly.end() && findit->second == true))
{
dictMap[tupleKey] = dictOid;
jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid;
ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias);
jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key;
}
}
}
else if ((fc = dynamic_cast<const FunctionColumn*>(srcp.get())) != NULL)
{
if (fc->aggColumnList().size() > 0)
hasAggCols = true;
}
else if (dynamic_cast<const AggregateColumn*>(srcp.get()) != NULL)
{
std::ostringstream errmsg;
errmsg << "Invalid aggregate function nesting.";
cerr << boldStart << errmsg.str() << boldStop << endl;
throw logic_error(errmsg.str());
}
else if ((wc = dynamic_cast<const WindowFunctionColumn*>(srcp.get())) == NULL)
{
std::ostringstream errmsg;
errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name();
cerr << boldStart << errmsg.str() << boldStop << endl;
throw logic_error(errmsg.str());
}
uint64_t eid = srcp.get()->expressionId();
ct = srcp.get()->resultType();
TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo));
tupleKey = ti.key;
if (hasAggCols)
jobInfo.expressionVec.push_back(tupleKey);
}
// add to project list
vector<uint32_t>::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey);
if (keyIt == projectKeys.end())
{
RetColsVector::iterator it = pcv.end();
if (doDistinct)
it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp);
else
it = pcv.insert(pcv.end(), srcp);
projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey);
}
else if (doDistinct) // @bug4250, move forward distinct column if necessary.
{
uint32_t pos = distance(projectKeys.begin(), keyIt);
if (pos >= lastGroupByPos)
{
pcv[pos] = pcv[lastGroupByPos];
pcv[lastGroupByPos] = srcp;
projectKeys[pos] = projectKeys[lastGroupByPos];
projectKeys[lastGroupByPos] = tupleKey;
lastGroupByPos++;
const ArithmeticColumn* ac = NULL;
const FunctionColumn* fc = NULL;
const WindowFunctionColumn* wc = NULL;
bool hasAggCols = false;
if ((ac = dynamic_cast<const ArithmeticColumn*>(srcp.get())) != NULL)
{
if (ac->aggColumnList().size() > 0)
hasAggCols = true;
}
else if ((fc = dynamic_cast<const FunctionColumn*>(srcp.get())) != NULL)
{
if (fc->aggColumnList().size() > 0)
hasAggCols = true;
}
else if (dynamic_cast<const AggregateColumn*>(srcp.get()) != NULL)
{
std::ostringstream errmsg;
errmsg << "Invalid aggregate function nesting.";
cerr << boldStart << errmsg.str() << boldStop << endl;
throw logic_error(errmsg.str());
}
else if (dynamic_cast<const ConstantColumn*>(srcp.get()) != NULL)
{
}
else if ((wc = dynamic_cast<const WindowFunctionColumn*>(srcp.get())) == NULL)
{
std::ostringstream errmsg;
errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name();
cerr << boldStart << errmsg.str() << boldStop << endl;
throw logic_error(errmsg.str());
}
uint64_t eid = srcp.get()->expressionId();
ct = srcp.get()->resultType();
TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo));
tupleKey = ti.key;
if (hasAggCols)
jobInfo.expressionVec.push_back(tupleKey);
}
}
if (doDistinct && dictOid > 0)
tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey];
// add to project list
vector<uint32_t>::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey);
// remember the columns to be returned
jobInfo.returnedColVec.push_back(make_pair(tupleKey, op));
if (keyIt == projectKeys.end())
{
RetColsVector::iterator it = pcv.end();
if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG)
jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale;
if (doDistinct)
it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp);
else
it = pcv.insert(pcv.end(), srcp);
// bug 1499 distinct processing, save unique distinct columns
if (doDistinct &&
(jobInfo.distinctColVec.end() ==
find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey)))
{
jobInfo.distinctColVec.push_back(tupleKey);
projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey);
}
else if (doDistinct) // @bug4250, move forward distinct column if necessary.
{
uint32_t pos = distance(projectKeys.begin(), keyIt);
if (pos >= lastGroupByPos)
{
pcv[pos] = pcv[lastGroupByPos];
pcv[lastGroupByPos] = srcp;
projectKeys[pos] = projectKeys[lastGroupByPos];
projectKeys[lastGroupByPos] = tupleKey;
lastGroupByPos++;
}
}
if (doDistinct && dictOid > 0)
tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey];
// remember the columns to be returned
jobInfo.returnedColVec.push_back(make_pair(tupleKey, op));
if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG)
jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale;
// bug 1499 distinct processing, save unique distinct columns
if (doDistinct &&
(jobInfo.distinctColVec.end() ==
find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey)))
{
jobInfo.distinctColVec.push_back(tupleKey);
}
}
}

View File

@ -164,6 +164,9 @@ inline RowAggFunctionType functionIdMap(int planFuncId)
case AggregateColumn::UDAF:
return ROWAGG_UDAF;
case AggregateColumn::MULTI_PARM:
return ROWAGG_MULTI_PARM;
default:
return ROWAGG_FUNCT_UNDEFINE;
}
@ -1302,7 +1305,7 @@ void TupleAggregateStep::prep1PhaseAggregate(
if (it == jobInfo.projectionCols.end())
{
throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s");
throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s");
}
}
else
@ -1468,7 +1471,7 @@ void TupleAggregateStep::prep1PhaseAggregate(
if (!udafFuncCol)
{
throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol");
throw logic_error("(2)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol");
}
pUDAFFunc = udafFuncCol->fUDAFContext.getFunction();
@ -1483,6 +1486,17 @@ void TupleAggregateStep::prep1PhaseAggregate(
break;
}
case ROWAGG_MULTI_PARM:
{
oidsAgg.push_back(oidsProj[colProj]);
keysAgg.push_back(key);
scaleAgg.push_back(scaleProj[colProj]);
precisionAgg.push_back(precisionProj[colProj]);
typeAgg.push_back(typeProj[colProj]);
widthAgg.push_back(width[colProj]);
}
break;
default:
{
ostringstream emsg;
@ -1560,7 +1574,7 @@ void TupleAggregateStep::prep1PhaseAggregate(
if (!udafFuncCol)
{
throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol");
throw logic_error("(3)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol");
}
functionVec[i]->fAuxColumnIndex = lastCol++;
@ -1675,7 +1689,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
// the groupby columns are put in front, even not a returned column
// sum and count(column name) are omitted, if avg present
{
// project only uniq oids, but they may be repeated in aggregation
// project only unique oids, but they may be repeated in aggregation
// collect the projected column info, prepare for aggregation
map<uint32_t, int> projColPosMap;
@ -1848,7 +1862,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
if (it == jobInfo.projectionCols.end())
{
throw logic_error("prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s");
throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s");
}
}
else
@ -2043,7 +2057,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
if (!udafFuncCol)
{
throw logic_error("prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol");
throw logic_error("(2)prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol");
}
// Return column
@ -2065,6 +2079,18 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
break;
}
case ROWAGG_MULTI_PARM:
{
oidsAgg.push_back(oidsProj[colProj]);
keysAgg.push_back(aggKey);
scaleAgg.push_back(scaleProj[colProj]);
precisionAgg.push_back(precisionProj[colProj]);
typeAgg.push_back(typeProj[colProj]);
widthAgg.push_back(widthProj[colProj]);
++colAgg;
}
break;
default:
{
ostringstream emsg;
@ -2111,7 +2137,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
groupByNoDist.push_back(groupby);
aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i));
}
projColsUDAFIndex = 0;
// locate the return column position in aggregated rowgroup
for (uint64_t i = 0; i < returnedColVec.size(); i++)
{
@ -2121,6 +2148,14 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second);
int colAgg = -1;
if (aggOp == ROWAGG_UDAF)
{
UDAFColumn* udafc = dynamic_cast<UDAFColumn*>(jobInfo.projectionCols[i].get());
if (udafc)
pUDAFFunc = udafc->getContext().getFunction();
}
if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) !=
jobInfo.distinctColVec.end() )
{
@ -2432,11 +2467,37 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
new RowAggFunctionCol(
ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex)));
}
// update the aggregate function vector
else
{
SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol(aggOp, stats, colAgg, i));
// update the aggregate function vector
SP_ROWAGG_FUNC_t funct;
if (aggOp == ROWAGG_UDAF)
{
std::vector<SRCP>::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex;
for (; it != jobInfo.projectionCols.end(); it++)
{
UDAFColumn* udafc = dynamic_cast<UDAFColumn*>((*it).get());
projColsUDAFIndex++;
if (udafc)
{
pUDAFFunc = udafc->getContext().getFunction();
// Create a RowAggFunctionCol (UDAF subtype) with the context.
funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i));
break;
}
}
if (it == jobInfo.projectionCols.end())
{
throw logic_error("(3)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s");
}
}
else
{
funct.reset(new RowAggFunctionCol(aggOp, stats, colAgg, i));
}
if (aggOp == ROWAGG_COUNT_NO_OP)
funct->fAuxColumnIndex = colAgg;
@ -2549,7 +2610,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
if (!udafFuncCol)
{
throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol");
throw logic_error("(4)prep1PhaseDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol");
}
functionVec2[i]->fAuxColumnIndex = lastCol++;
@ -2893,7 +2954,7 @@ void TupleAggregateStep::prep2PhasesAggregate(
// the groupby columns are put in front, even not a returned column
// sum and count(column name) are omitted, if avg present
{
// project only uniq oids, but they may be repeated in aggregation
// project only unique oids, but they may be repeated in aggregation
// collect the projected column info, prepare for aggregation
vector<uint32_t> width;
map<uint32_t, int> projColPosMap;
@ -3036,12 +3097,11 @@ void TupleAggregateStep::prep2PhasesAggregate(
funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAggPm));
break;
}
}
if (it == jobInfo.projectionCols.end())
{
throw logic_error("prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s");
throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s");
}
}
else
@ -3240,7 +3300,7 @@ void TupleAggregateStep::prep2PhasesAggregate(
if (!udafFuncCol)
{
throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol");
throw logic_error("(2)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol");
}
oidsAggPm.push_back(oidsProj[colProj]);
@ -3261,6 +3321,18 @@ void TupleAggregateStep::prep2PhasesAggregate(
break;
}
case ROWAGG_MULTI_PARM:
{
oidsAggPm.push_back(oidsProj[colProj]);
keysAggPm.push_back(aggKey);
scaleAggPm.push_back(scaleProj[colProj]);
precisionAggPm.push_back(precisionProj[colProj]);
typeAggPm.push_back(typeProj[colProj]);
widthAggPm.push_back(width[colProj]);
colAggPm++;
}
break;
default:
{
ostringstream emsg;
@ -3278,11 +3350,16 @@ void TupleAggregateStep::prep2PhasesAggregate(
// add back sum or count(column name) if omitted due to avg column
// put count(column name) column to the end, if it is for avg only
{
// Keep a count of the parms after the first for any aggregate.
// These will be skipped and the count needs to be subtracted
// from where the aux column will be.
int64_t multiParms = 0;
// check if the count column for AVG is also a returned column,
// if so, replace the "-1" to actual position in returned vec.
map<uint32_t, SP_ROWAGG_FUNC_t> avgFuncMap;
AGG_MAP aggDupFuncMap;
projColsUDAFIndex = 0;
// copy over the groupby vector
// update the outputColumnIndex if returned
for (uint64_t i = 0; i < groupByPm.size(); i++)
@ -3299,7 +3376,14 @@ void TupleAggregateStep::prep2PhasesAggregate(
RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second);
int colPm = -1;
if (aggOp == ROWAGG_MULTI_PARM)
{
// Skip on UM: Extra parms for an aggregate have no work on the UM
++multiParms;
continue;
}
// Is this a UDAF? use the function as part of the key.
mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL;
if (aggOp == ROWAGG_UDAF)
@ -3452,20 +3536,36 @@ void TupleAggregateStep::prep2PhasesAggregate(
functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol(
ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex)));
}
// update the aggregate function vector
else
{
// update the aggregate function vector
SP_ROWAGG_FUNC_t funct;
if (aggOp == ROWAGG_UDAF)
{
UDAFColumn* udafc = dynamic_cast<UDAFColumn*>(jobInfo.projectionCols[i].get());
funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i));
std::vector<SRCP>::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex;
for (; it != jobInfo.projectionCols.end(); it++)
{
UDAFColumn* udafc = dynamic_cast<UDAFColumn*>((*it).get());
projColsUDAFIndex++;
if (udafc)
{
pUDAFFunc = udafc->getContext().getFunction();
// Create a RowAggFunctionCol (UDAF subtype) with the context.
funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i-multiParms));
break;
}
}
if (it == jobInfo.projectionCols.end())
{
throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s");
}
}
else
{
funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i));
funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i-multiParms));
}
if (aggOp == ROWAGG_COUNT_NO_OP)
@ -3517,7 +3617,7 @@ void TupleAggregateStep::prep2PhasesAggregate(
}
// there is avg(k), but no count(k) in the select list
uint64_t lastCol = returnedColVec.size();
uint64_t lastCol = returnedColVec.size() - multiParms;
for (map<uint32_t, SP_ROWAGG_FUNC_t>::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++)
{
@ -3545,7 +3645,7 @@ void TupleAggregateStep::prep2PhasesAggregate(
if (!udafFuncCol)
{
throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol");
throw logic_error("(4)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol");
}
functionVecUm[i]->fAuxColumnIndex = lastCol++;
@ -3691,6 +3791,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
vector<SP_ROWAGG_GRPBY_t> groupByPm, groupByUm, groupByNoDist;
vector<SP_ROWAGG_FUNC_t> functionVecPm, functionNoDistVec, functionVecUm;
list<uint32_t> multiParmIndexes;
uint32_t bigIntWidth = sizeof(int64_t);
map<pair<uint32_t, int>, uint64_t> avgFuncDistMap;
@ -3702,7 +3803,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
// the groupby columns are put in front, even not a returned column
// sum and count(column name) are omitted, if avg present
{
// project only uniq oids, but they may be repeated in aggregation
// project only unique oids, but they may be repeated in aggregation
// collect the projected column info, prepare for aggregation
vector<uint32_t> width;
map<uint32_t, int> projColPosMap;
@ -3856,7 +3957,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
if (it == jobInfo.projectionCols.end())
{
throw logic_error("prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s");
throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s");
}
}
else
@ -4050,7 +4151,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
if (!udafFuncCol)
{
throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol");
throw logic_error("(2)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol");
}
// Return column
@ -4072,6 +4173,19 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
break;
}
case ROWAGG_MULTI_PARM:
{
oidsAggPm.push_back(oidsProj[colProj]);
keysAggPm.push_back(aggKey);
scaleAggPm.push_back(scaleProj[colProj]);
precisionAggPm.push_back(precisionProj[colProj]);
typeAggPm.push_back(typeProj[colProj]);
widthAggPm.push_back(width[colProj]);
multiParmIndexes.push_back(colAggPm);
colAggPm++;
}
break;
default:
{
ostringstream emsg;
@ -4093,12 +4207,23 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
groupByUm.push_back(groupby);
}
// Keep a count of the parms after the first for any aggregate.
// These will be skipped and the count needs to be subtracted
// from where the aux column will be.
int64_t multiParms = 0;
for (uint32_t idx = 0; idx < functionVecPm.size(); idx++)
{
SP_ROWAGG_FUNC_t funct;
SP_ROWAGG_FUNC_t funcPm = functionVecPm[idx];
// UDAF support
if (funcPm->fAggFunction == ROWAGG_MULTI_PARM)
{
// Multi-Parm is not used on the UM
++multiParms;
continue;
}
if (funcPm->fAggFunction == ROWAGG_UDAF)
{
RowUDAFFunctionCol* udafFuncCol = dynamic_cast<RowUDAFFunctionCol*>(funcPm.get());
@ -4106,7 +4231,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
udafFuncCol->fUDAFContext,
udafFuncCol->fOutputColumnIndex,
udafFuncCol->fOutputColumnIndex,
udafFuncCol->fAuxColumnIndex));
udafFuncCol->fAuxColumnIndex-multiParms));
functionNoDistVec.push_back(funct);
}
else
@ -4116,18 +4241,25 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
funcPm->fStatsFunction,
funcPm->fOutputColumnIndex,
funcPm->fOutputColumnIndex,
funcPm->fAuxColumnIndex));
funcPm->fAuxColumnIndex-multiParms));
functionNoDistVec.push_back(funct);
}
}
posAggUm = posAggPm;
oidsAggUm = oidsAggPm;
keysAggUm = keysAggPm;
scaleAggUm = scaleAggPm;
precisionAggUm = precisionAggPm;
widthAggUm = widthAggPm;
typeAggUm = typeAggPm;
// Copy over the PM arrays to the UM. Skip any that are a multi-parm entry.
for (uint32_t idx = 0; idx < oidsAggPm.size(); ++idx)
{
if (find (multiParmIndexes.begin(), multiParmIndexes.end(), idx ) != multiParmIndexes.end())
{
continue;
}
oidsAggUm.push_back(oidsAggPm[idx]);
keysAggUm.push_back(keysAggPm[idx]);
scaleAggUm.push_back(scaleAggPm[idx]);
precisionAggUm.push_back(precisionAggPm[idx]);
widthAggUm.push_back(widthAggPm[idx]);
typeAggUm.push_back(typeAggPm[idx]);
}
}
@ -4137,6 +4269,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
// add back sum or count(column name) if omitted due to avg column
// put count(column name) column to the end, if it is for avg only
{
// Keep a count of the parms after the first for any aggregate.
// These will be skipped and the count needs to be subtracted
// from where the aux column will be.
int64_t multiParms = 0;
// check if the count column for AVG is also a returned column,
// if so, replace the "-1" to actual position in returned vec.
map<uint32_t, SP_ROWAGG_FUNC_t> avgFuncMap, avgDistFuncMap;
@ -4159,6 +4295,21 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second);
int colUm = -1;
if (aggOp == ROWAGG_MULTI_PARM)
{
// Skip on UM: Extra parms for an aggregate have no work on the UM
++multiParms;
continue;
}
if (aggOp == ROWAGG_UDAF)
{
UDAFColumn* udafc = dynamic_cast<UDAFColumn*>(jobInfo.projectionCols[i].get());
if (udafc)
pUDAFFunc = udafc->getContext().getFunction();
}
if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) !=
jobInfo.distinctColVec.end() )
{
@ -4285,7 +4436,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
if (it != aggFuncMap.end())
{
colUm = it->second;
colUm = it->second - multiParms;
oidsAggDist.push_back(oidsAggUm[colUm]);
keysAggDist.push_back(keysAggUm[colUm]);
scaleAggDist.push_back(scaleAggUm[colUm]);
@ -4309,7 +4460,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
// false alarm
returnColMissing = false;
colUm = it->second;
colUm = it->second - multiParms;
if (aggOp == ROWAGG_SUM)
{
@ -4412,21 +4563,36 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol(
ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex)));
}
// update the aggregate function vector
else
{
// update the aggregate function vector
SP_ROWAGG_FUNC_t funct;
if (aggOp == ROWAGG_UDAF)
{
UDAFColumn* udafc = dynamic_cast<UDAFColumn*>(jobInfo.projectionCols[i].get());
pUDAFFunc = udafc->getContext().getFunction();
funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i));
std::vector<SRCP>::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex;
for (; it != jobInfo.projectionCols.end(); it++)
{
UDAFColumn* udafc = dynamic_cast<UDAFColumn*>((*it).get());
projColsUDAFIndex++;
if (udafc)
{
pUDAFFunc = udafc->getContext().getFunction();
// Create a RowAggFunctionCol (UDAF subtype) with the context.
funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i-multiParms));
break;
}
}
if (it == jobInfo.projectionCols.end())
{
throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s");
}
}
else
{
funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i));
funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i-multiParms));
}
if (aggOp == ROWAGG_COUNT_NO_OP)
@ -4480,7 +4646,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
}
// there is avg(k), but no count(k) in the select list
uint64_t lastCol = returnedColVec.size();
uint64_t lastCol = returnedColVec.size() - multiParms;
for (map<uint32_t, SP_ROWAGG_FUNC_t>::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++)
{
@ -4540,7 +4706,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
if (!udafFuncCol)
{
throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol");
throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol");
}
functionVecUm[i]->fAuxColumnIndex = lastCol++;
@ -4687,6 +4853,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k));
groupBySub.push_back(groupby);
// Keep a count of the parms after the first for any aggregate.
// These will be skipped and the count needs to be subtracted
// from where the aux column will be.
int64_t multiParms = 0;
// tricky part : 2 function vectors
// -- dummy function vector for sub-aggregator, which does distinct only
// -- aggregate function on this distinct column for rowAggDist
@ -4694,6 +4865,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
for (uint64_t k = 0; k < returnedColVec.size(); k++)
{
if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM)
{
++multiParms;
continue;
}
if (returnedColVec[k].first != distinctColKey)
continue;
@ -4715,7 +4891,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
f->fStatsFunction,
groupBySub.size() - 1,
f->fOutputColumnIndex,
f->fAuxColumnIndex));
f->fAuxColumnIndex-multiParms));
functionSub2.push_back(funct);
}
}
@ -4732,9 +4908,15 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
{
vector<SP_ROWAGG_FUNC_t> functionSub1 = functionNoDistVec;
vector<SP_ROWAGG_FUNC_t> functionSub2;
int64_t multiParms = 0;
for (uint64_t k = 0; k < returnedColVec.size(); k++)
{
if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM)
{
++multiParms;
continue;
}
// search non-distinct functions in functionVec
vector<SP_ROWAGG_FUNC_t>::iterator it = functionVecUm.begin();
@ -4752,7 +4934,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
udafFuncCol->fUDAFContext,
udafFuncCol->fInputColumnIndex,
udafFuncCol->fOutputColumnIndex,
udafFuncCol->fAuxColumnIndex));
udafFuncCol->fAuxColumnIndex-multiParms));
functionSub2.push_back(funct);
}
else if (f->fAggFunction == ROWAGG_COUNT_ASTERISK ||
@ -4773,7 +4955,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
f->fStatsFunction,
f->fInputColumnIndex,
f->fOutputColumnIndex,
f->fAuxColumnIndex));
f->fAuxColumnIndex-multiParms));
functionSub2.push_back(funct);
}
}

View File

@ -4038,6 +4038,10 @@ ParseTree* buildParseTree(Item_func* item, gp_walk_info& gwi, bool& nonSupport)
ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
{
// MCOL-1201 For UDAnF multiple parameters
vector<SRCP> selCols;
vector<SRCP> orderCols;
if (!(gwi.thd->infinidb_vtable.cal_conn_info))
gwi.thd->infinidb_vtable.cal_conn_info = (void*)(new cal_connection_info());
@ -4054,6 +4058,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
// N.B. argument_count() is the # of formal parms to the agg fcn. InifniDB only supports 1 argument
// TODO: Support more than one parm
#if 0
if (isp->argument_count() != 1 && isp->sum_func() != Item_sum::GROUP_CONCAT_FUNC
&& isp->sum_func() != Item_sum::UDF_SUM_FUNC)
{
@ -4061,7 +4066,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_MUL_ARG_AGG);
return NULL;
}
#endif
AggregateColumn* ac = NULL;
if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC)
@ -4084,444 +4089,509 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
{
gwi.fatalParseError = true;
gwi.parseErrorText = "Non supported aggregate type on the select clause";
if (ac)
delete ac;
return NULL;
}
// special parsing for group_concat
if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC)
try
{
Item_func_group_concat* gc = (Item_func_group_concat*)isp;
// special parsing for group_concat
if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC)
{
Item_func_group_concat* gc = (Item_func_group_concat*)isp;
vector<SRCP> orderCols;
RowColumn* rowCol = new RowColumn();
RowColumn* rowCol = new RowColumn();
vector<SRCP> selCols;
uint32_t select_ctn = gc->count_field();
ReturnedColumn* rc = NULL;
uint32_t select_ctn = gc->count_field();
ReturnedColumn* rc = NULL;
for (uint32_t i = 0; i < select_ctn; i++)
{
rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError);
if (!rc || gwi.fatalParseError)
return NULL;
selCols.push_back(SRCP(rc));
}
ORDER** order_item, **end;
for (order_item = gc->get_order(),
end = order_item + gc->order_field(); order_item < end;
order_item++)
{
Item* ord_col = *(*order_item)->item;
if (ord_col->type() == Item::INT_ITEM)
for (uint32_t i = 0; i < select_ctn; i++)
{
Item_int* id = (Item_int*)ord_col;
if (id->val_int() > (int)selCols.size())
{
gwi.fatalParseError = true;
return NULL;
}
rc = selCols[id->val_int() - 1]->clone();
rc->orderPos(id->val_int() - 1);
}
else
{
rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError);
rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError);
if (!rc || gwi.fatalParseError)
{
if (ac)
delete ac;
return NULL;
}
selCols.push_back(SRCP(rc));
}
// 10.2 TODO: direction is now a tri-state flag
rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false);
orderCols.push_back(SRCP(rc));
}
ORDER** order_item, **end;
rowCol->columnVec(selCols);
(dynamic_cast<GroupConcatColumn*>(ac))->orderCols(orderCols);
parm.reset(rowCol);
if (gc->str_separator())
{
string separator;
separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length());
(dynamic_cast<GroupConcatColumn*>(ac))->separator(separator);
}
}
else
{
for (uint32_t i = 0; i < isp->argument_count(); i++)
{
Item* sfitemp = sfitempp[i];
Item::Type sfitype = sfitemp->type();
switch (sfitype)
for (order_item = gc->get_order(),
end = order_item + gc->order_field(); order_item < end;
order_item++)
{
case Item::FIELD_ITEM:
{
Item_field* ifp = reinterpret_cast<Item_field*>(sfitemp);
SimpleColumn* sc = buildSimpleColumn(ifp, gwi);
Item* ord_col = *(*order_item)->item;
if (!sc)
if (ord_col->type() == Item::INT_ITEM)
{
Item_int* id = (Item_int*)ord_col;
if (id->val_int() > (int)selCols.size())
{
gwi.fatalParseError = true;
break;
if (ac)
delete ac;
return NULL;
}
parm.reset(sc);
gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm));
TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0);
gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp);
break;
rc = selCols[id->val_int() - 1]->clone();
rc->orderPos(id->val_int() - 1);
}
case Item::INT_ITEM:
case Item::STRING_ITEM:
case Item::REAL_ITEM:
case Item::DECIMAL_ITEM:
else
{
// treat as count(*)
if (ac->aggOp() == AggregateColumn::COUNT)
ac->aggOp(AggregateColumn::COUNT_ASTERISK);
rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError);
ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError)));
break;
}
case Item::NULL_ITEM:
{
//ac->aggOp(AggregateColumn::COUNT);
parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA));
//ac->functionParms(parm);
ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError)));
break;
}
case Item::FUNC_ITEM:
{
Item_func* ifp = (Item_func*)sfitemp;
ReturnedColumn* rc = 0;
// check count(1+1) case
vector <Item_field*> tmpVec;
uint16_t parseInfo = 0;
parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo);
if (parseInfo & SUB_BIT)
if (!rc || gwi.fatalParseError)
{
gwi.fatalParseError = true;
break;
}
else if (!gwi.fatalParseError &&
!(parseInfo & AGG_BIT) &&
!(parseInfo & AF_BIT) &&
tmpVec.size() == 0)
{
rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError);
FunctionColumn* fc = dynamic_cast<FunctionColumn*>(rc);
if ((fc && fc->functionParms().empty()) || !fc)
{
//ac->aggOp(AggregateColumn::COUNT_ASTERISK);
ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError);
if (dynamic_cast<ConstantColumn*>(rc))
{
//@bug5229. handle constant function on aggregate argument
ac->constCol(SRCP(rc));
break;
}
}
}
// MySQL carelessly allows correlated aggregate function on the WHERE clause.
// Here is the work around to deal with that inconsistence.
// e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1;
ClauseType clauseType = gwi.clauseType;
if (gwi.clauseType == WHERE)
gwi.clauseType = HAVING;
// @bug 3603. for cases like max(rand()). try to build function first.
if (!rc)
rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError);
parm.reset(rc);
gwi.clauseType = clauseType;
if (gwi.fatalParseError)
break;
//ac->functionParms(parm);
break;
}
case Item::REF_ITEM:
{
ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError);
if (rc)
{
parm.reset(rc);
//ac->functionParms(parm);
break;
if (ac)
delete ac;
return NULL;
}
}
default:
{
gwi.fatalParseError = true;
//gwi.parseErrorText = "Non-supported Item in Aggregate function";
}
// 10.2 TODO: direction is now a tri-state flag
rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false);
orderCols.push_back(SRCP(rc));
}
if (gwi.fatalParseError)
rowCol->columnVec(selCols);
(dynamic_cast<GroupConcatColumn*>(ac))->orderCols(orderCols);
parm.reset(rowCol);
if (gc->str_separator())
{
if (gwi.parseErrorText.empty())
{
Message::Args args;
if (item->name)
args.add(item->name);
else
args.add("");
gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args);
}
return NULL;
string separator;
separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length());
(dynamic_cast<GroupConcatColumn*>(ac))->separator(separator);
}
}
}
if (parm)
{
ac->functionParms(parm);
if (isp->sum_func() == Item_sum::AVG_FUNC ||
isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC)
{
CalpontSystemCatalog::ColType ct = parm->resultType();
switch (ct.colDataType)
{
case CalpontSystemCatalog::TINYINT:
case CalpontSystemCatalog::SMALLINT:
case CalpontSystemCatalog::MEDINT:
case CalpontSystemCatalog::INT:
case CalpontSystemCatalog::BIGINT:
case CalpontSystemCatalog::DECIMAL:
case CalpontSystemCatalog::UDECIMAL:
case CalpontSystemCatalog::UTINYINT:
case CalpontSystemCatalog::USMALLINT:
case CalpontSystemCatalog::UMEDINT:
case CalpontSystemCatalog::UINT:
case CalpontSystemCatalog::UBIGINT:
ct.colDataType = CalpontSystemCatalog::DECIMAL;
ct.colWidth = 8;
ct.scale += 4;
break;
#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM
case CalpontSystemCatalog::FLOAT:
case CalpontSystemCatalog::UFLOAT:
case CalpontSystemCatalog::DOUBLE:
case CalpontSystemCatalog::UDOUBLE:
ct.colDataType = CalpontSystemCatalog::DOUBLE;
ct.colWidth = 8;
break;
#endif
default:
break;
}
ac->resultType(ct);
}
else if (isp->sum_func() == Item_sum::COUNT_FUNC ||
isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC)
{
CalpontSystemCatalog::ColType ct;
ct.colDataType = CalpontSystemCatalog::BIGINT;
ct.colWidth = 8;
ct.scale = parm->resultType().scale;
ac->resultType(ct);
}
else if (isp->sum_func() == Item_sum::SUM_FUNC ||
isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC)
{
CalpontSystemCatalog::ColType ct = parm->resultType();
switch (ct.colDataType)
{
case CalpontSystemCatalog::TINYINT:
case CalpontSystemCatalog::SMALLINT:
case CalpontSystemCatalog::MEDINT:
case CalpontSystemCatalog::INT:
case CalpontSystemCatalog::BIGINT:
ct.colDataType = CalpontSystemCatalog::BIGINT;
// no break, let fall through
case CalpontSystemCatalog::DECIMAL:
case CalpontSystemCatalog::UDECIMAL:
ct.colWidth = 8;
break;
case CalpontSystemCatalog::UTINYINT:
case CalpontSystemCatalog::USMALLINT:
case CalpontSystemCatalog::UMEDINT:
case CalpontSystemCatalog::UINT:
case CalpontSystemCatalog::UBIGINT:
ct.colDataType = CalpontSystemCatalog::UBIGINT;
ct.colWidth = 8;
break;
#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM
case CalpontSystemCatalog::FLOAT:
case CalpontSystemCatalog::UFLOAT:
case CalpontSystemCatalog::DOUBLE:
case CalpontSystemCatalog::UDOUBLE:
ct.colDataType = CalpontSystemCatalog::DOUBLE;
ct.colWidth = 8;
break;
#endif
default:
break;
}
ac->resultType(ct);
}
else if (isp->sum_func() == Item_sum::STD_FUNC ||
isp->sum_func() == Item_sum::VARIANCE_FUNC)
{
CalpontSystemCatalog::ColType ct;
ct.colDataType = CalpontSystemCatalog::DOUBLE;
ct.colWidth = 8;
ct.scale = 0;
ac->resultType(ct);
}
else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC)
{
CalpontSystemCatalog::ColType ct;
ct.colDataType = CalpontSystemCatalog::BIGINT;
ct.colWidth = 8;
ct.scale = 0;
ct.precision = -16; // borrowed to indicate skip null value check on connector
ac->resultType(ct);
}
else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC)
{
//Item_func_group_concat* gc = (Item_func_group_concat*)isp;
CalpontSystemCatalog::ColType ct;
ct.colDataType = CalpontSystemCatalog::VARCHAR;
ct.colWidth = isp->max_length;
ct.precision = 0;
ac->resultType(ct);
}
else
{
ac->resultType(parm->resultType());
for (uint32_t i = 0; i < isp->argument_count(); i++)
{
Item* sfitemp = sfitempp[i];
Item::Type sfitype = sfitemp->type();
switch (sfitype)
{
case Item::FIELD_ITEM:
{
Item_field* ifp = reinterpret_cast<Item_field*>(sfitemp);
SimpleColumn* sc = buildSimpleColumn(ifp, gwi);
if (!sc)
{
gwi.fatalParseError = true;
break;
}
parm.reset(sc);
gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm));
TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0);
gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp);
break;
}
case Item::INT_ITEM:
case Item::STRING_ITEM:
case Item::REAL_ITEM:
case Item::DECIMAL_ITEM:
{
// treat as count(*)
if (ac->aggOp() == AggregateColumn::COUNT)
ac->aggOp(AggregateColumn::COUNT_ASTERISK);
ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError)));
break;
}
case Item::NULL_ITEM:
{
parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA));
ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError)));
break;
}
case Item::FUNC_ITEM:
{
Item_func* ifp = (Item_func*)sfitemp;
ReturnedColumn* rc = 0;
// check count(1+1) case
vector <Item_field*> tmpVec;
uint16_t parseInfo = 0;
parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo);
if (parseInfo & SUB_BIT)
{
gwi.fatalParseError = true;
break;
}
else if (!gwi.fatalParseError &&
!(parseInfo & AGG_BIT) &&
!(parseInfo & AF_BIT) &&
tmpVec.size() == 0)
{
rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError);
FunctionColumn* fc = dynamic_cast<FunctionColumn*>(rc);
if ((fc && fc->functionParms().empty()) || !fc)
{
//ac->aggOp(AggregateColumn::COUNT_ASTERISK);
ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError);
if (dynamic_cast<ConstantColumn*>(rc))
{
//@bug5229. handle constant function on aggregate argument
ac->constCol(SRCP(rc));
break;
}
}
}
// MySQL carelessly allows correlated aggregate function on the WHERE clause.
// Here is the work around to deal with that inconsistence.
// e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1;
ClauseType clauseType = gwi.clauseType;
if (gwi.clauseType == WHERE)
gwi.clauseType = HAVING;
// @bug 3603. for cases like max(rand()). try to build function first.
if (!rc)
rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError);
parm.reset(rc);
gwi.clauseType = clauseType;
if (gwi.fatalParseError)
break;
break;
}
case Item::REF_ITEM:
{
ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError);
if (rc)
{
parm.reset(rc);
break;
}
}
default:
{
gwi.fatalParseError = true;
//gwi.parseErrorText = "Non-supported Item in Aggregate function";
}
}
if (gwi.fatalParseError)
{
if (gwi.parseErrorText.empty())
{
Message::Args args;
if (item->name)
args.add(item->name);
else
args.add("");
gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args);
}
if (ac)
delete ac;
return NULL;
}
if (parm)
{
// MCOL-1201 multi-argument aggregate
ac->aggParms().push_back(parm);
}
}
}
}
else
{
ac->resultType(colType_MysqlToIDB(isp));
}
// adjust decimal result type according to internalDecimalScale
if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL)
{
CalpontSystemCatalog::ColType ct = ac->resultType();
ct.scale = gwi.internalDecimalScale;
ac->resultType(ct);
}
// check for same aggregate on the select list
ac->expressionId(ci->expressionId++);
if (gwi.clauseType != SELECT)
{
for (uint32_t i = 0; i < gwi.returnedCols.size(); i++)
// Get result type
// Modified for MCOL-1201 multi-argument aggregate
if (ac->aggParms().size() > 0)
{
if (*ac == gwi.returnedCols[i].get())
ac->expressionId(gwi.returnedCols[i]->expressionId());
}
}
// These are all one parm functions, so we can safely
// use the first parm for result type.
parm = ac->aggParms()[0];
if (isp->sum_func() == Item_sum::AVG_FUNC ||
isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC)
{
CalpontSystemCatalog::ColType ct = parm->resultType();
// @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will
// be applied in ExeMgr. When the ExeMgr fix is available, this checking
// will be taken out.
if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty())
switch (ct.colDataType)
{
case CalpontSystemCatalog::TINYINT:
case CalpontSystemCatalog::SMALLINT:
case CalpontSystemCatalog::MEDINT:
case CalpontSystemCatalog::INT:
case CalpontSystemCatalog::BIGINT:
case CalpontSystemCatalog::DECIMAL:
case CalpontSystemCatalog::UDECIMAL:
case CalpontSystemCatalog::UTINYINT:
case CalpontSystemCatalog::USMALLINT:
case CalpontSystemCatalog::UMEDINT:
case CalpontSystemCatalog::UINT:
case CalpontSystemCatalog::UBIGINT:
ct.colDataType = CalpontSystemCatalog::DECIMAL;
ct.colWidth = 8;
ct.scale += 4;
break;
#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM
case CalpontSystemCatalog::FLOAT:
case CalpontSystemCatalog::UFLOAT:
case CalpontSystemCatalog::DOUBLE:
case CalpontSystemCatalog::UDOUBLE:
ct.colDataType = CalpontSystemCatalog::DOUBLE;
ct.colWidth = 8;
break;
#endif
default:
break;
}
ac->resultType(ct);
}
else if (isp->sum_func() == Item_sum::COUNT_FUNC ||
isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC)
{
CalpontSystemCatalog::ColType ct;
ct.colDataType = CalpontSystemCatalog::BIGINT;
ct.colWidth = 8;
ct.scale = parm->resultType().scale;
ac->resultType(ct);
}
else if (isp->sum_func() == Item_sum::SUM_FUNC ||
isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC)
{
CalpontSystemCatalog::ColType ct = parm->resultType();
switch (ct.colDataType)
{
case CalpontSystemCatalog::TINYINT:
case CalpontSystemCatalog::SMALLINT:
case CalpontSystemCatalog::MEDINT:
case CalpontSystemCatalog::INT:
case CalpontSystemCatalog::BIGINT:
ct.colDataType = CalpontSystemCatalog::BIGINT;
// no break, let fall through
case CalpontSystemCatalog::DECIMAL:
case CalpontSystemCatalog::UDECIMAL:
ct.colWidth = 8;
break;
case CalpontSystemCatalog::UTINYINT:
case CalpontSystemCatalog::USMALLINT:
case CalpontSystemCatalog::UMEDINT:
case CalpontSystemCatalog::UINT:
case CalpontSystemCatalog::UBIGINT:
ct.colDataType = CalpontSystemCatalog::UBIGINT;
ct.colWidth = 8;
break;
#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM
case CalpontSystemCatalog::FLOAT:
case CalpontSystemCatalog::UFLOAT:
case CalpontSystemCatalog::DOUBLE:
case CalpontSystemCatalog::UDOUBLE:
ct.colDataType = CalpontSystemCatalog::DOUBLE;
ct.colWidth = 8;
break;
#endif
default:
break;
}
ac->resultType(ct);
}
else if (isp->sum_func() == Item_sum::STD_FUNC ||
isp->sum_func() == Item_sum::VARIANCE_FUNC)
{
CalpontSystemCatalog::ColType ct;
ct.colDataType = CalpontSystemCatalog::DOUBLE;
ct.colWidth = 8;
ct.scale = 0;
ac->resultType(ct);
}
else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC)
{
CalpontSystemCatalog::ColType ct;
ct.colDataType = CalpontSystemCatalog::BIGINT;
ct.colWidth = 8;
ct.scale = 0;
ct.precision = -16; // borrowed to indicate skip null value check on connector
ac->resultType(ct);
}
else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC)
{
//Item_func_group_concat* gc = (Item_func_group_concat*)isp;
CalpontSystemCatalog::ColType ct;
ct.colDataType = CalpontSystemCatalog::VARCHAR;
ct.colWidth = isp->max_length;
ct.precision = 0;
ac->resultType(ct);
}
else
{
// UDAF result type will be set below.
ac->resultType(parm->resultType());
}
}
else
{
ac->resultType(colType_MysqlToIDB(isp));
}
// adjust decimal result type according to internalDecimalScale
if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL)
{
CalpontSystemCatalog::ColType ct = ac->resultType();
ct.scale = gwi.internalDecimalScale;
ac->resultType(ct);
}
// check for same aggregate on the select list
ac->expressionId(ci->expressionId++);
if (gwi.clauseType != SELECT)
{
for (uint32_t i = 0; i < gwi.returnedCols.size(); i++)
{
if (*ac == gwi.returnedCols[i].get())
ac->expressionId(gwi.returnedCols[i]->expressionId());
}
}
// @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will
// be applied in ExeMgr. When the ExeMgr fix is available, this checking
// will be taken out.
if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty())
{
gwi.fatalParseError = true;
gwi.parseErrorText = "No project column found for aggregate function";
if (ac)
delete ac;
return NULL;
}
else if (ac->constCol())
{
gwi.count_asterisk_list.push_back(ac);
}
// For UDAF, populate the context and call the UDAF init() function.
// The return type is (should be) set in context by init().
if (isp->sum_func() == Item_sum::UDF_SUM_FUNC)
{
UDAFColumn* udafc = dynamic_cast<UDAFColumn*>(ac);
if (udafc)
{
mcsv1Context& context = udafc->getContext();
context.setName(isp->func_name());
// Set up the return type defaults for the call to init()
context.setResultType(udafc->resultType().colDataType);
context.setColWidth(udafc->resultType().colWidth);
context.setScale(udafc->resultType().scale);
context.setPrecision(udafc->resultType().precision);
context.setParamCount(udafc->aggParms().size());
ColumnDatum colType;
ColumnDatum colTypes[udafc->aggParms().size()];
// Build the column type vector.
// Modified for MCOL-1201 multi-argument aggregate
for (uint32_t i = 0; i < udafc->aggParms().size(); ++i)
{
const execplan::CalpontSystemCatalog::ColType& resultType
= udafc->aggParms()[i]->resultType();
colType.dataType = resultType.colDataType;
colType.precision = resultType.precision;
colType.scale = resultType.scale;
colTypes[i] = colType;
}
// Call the user supplied init()
mcsv1sdk::mcsv1_UDAF* udaf = context.getFunction();
if (!udaf)
{
gwi.fatalParseError = true;
gwi.parseErrorText = "Aggregate Function " + context.getName() + " doesn't exist in the ColumnStore engine";
if (ac)
delete ac;
return NULL;
}
if (udaf->init(&context, colTypes) == mcsv1_UDAF::ERROR)
{
gwi.fatalParseError = true;
gwi.parseErrorText = udafc->getContext().getErrorMessage();
if (ac)
delete ac;
return NULL;
}
// UDAF_OVER_REQUIRED means that this function is for Window
// Function only. Reject it here in aggregate land.
if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED))
{
gwi.fatalParseError = true;
gwi.parseErrorText =
logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY,
context.getName());
if (ac)
delete ac;
return NULL;
}
// Set the return type as set in init()
CalpontSystemCatalog::ColType ct;
ct.colDataType = context.getResultType();
ct.colWidth = context.getColWidth();
ct.scale = context.getScale();
ct.precision = context.getPrecision();
udafc->resultType(ct);
}
}
}
catch (std::logic_error e)
{
gwi.fatalParseError = true;
gwi.parseErrorText = "No project column found for aggregate function";
gwi.parseErrorText = "error building Aggregate Function: ";
gwi.parseErrorText += e.what();
if (ac)
delete ac;
return NULL;
}
else if (ac->constCol())
catch (...)
{
gwi.count_asterisk_list.push_back(ac);
gwi.fatalParseError = true;
gwi.parseErrorText = "error building Aggregate Function: Unspecified exception";
if (ac)
delete ac;
return NULL;
}
// For UDAF, populate the context and call the UDAF init() function.
if (isp->sum_func() == Item_sum::UDF_SUM_FUNC)
{
UDAFColumn* udafc = dynamic_cast<UDAFColumn*>(ac);
if (udafc)
{
mcsv1Context& context = udafc->getContext();
context.setName(isp->func_name());
// Set up the return type defaults for the call to init()
context.setResultType(udafc->resultType().colDataType);
context.setColWidth(udafc->resultType().colWidth);
context.setScale(udafc->resultType().scale);
context.setPrecision(udafc->resultType().precision);
COL_TYPES colTypes;
execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter;
// Build the column type vector. For now, there is only one
colTypes.push_back(make_pair(udafc->functionParms()->alias(), udafc->functionParms()->resultType().colDataType));
// Call the user supplied init()
if (context.getFunction()->init(&context, colTypes) == mcsv1_UDAF::ERROR)
{
gwi.fatalParseError = true;
gwi.parseErrorText = udafc->getContext().getErrorMessage();
return NULL;
}
if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED))
{
gwi.fatalParseError = true;
gwi.parseErrorText =
logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY,
context.getName());
return NULL;
}
// Set the return type as set in init()
CalpontSystemCatalog::ColType ct;
ct.colDataType = context.getResultType();
ct.colWidth = context.getColWidth();
ct.scale = context.getScale();
ct.precision = context.getPrecision();
udafc->resultType(ct);
}
}
return ac;
}
@ -7839,7 +7909,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
return ER_CHECK_NOT_IMPLEMENTED;
}
(*coliter)->functionParms(minSc);
(*coliter)->aggParms().push_back(minSc);
}
std::vector<FunctionColumn*>::iterator funciter;
@ -9949,7 +10019,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro
return ER_CHECK_NOT_IMPLEMENTED;
}
(*coliter)->functionParms(minSc);
(*coliter)->aggParms().push_back(minSc);
}
std::vector<FunctionColumn*>::iterator funciter;

View File

@ -781,8 +781,11 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, bool h
//double double_val = *(double*)(&value);
//f2->store(double_val);
if (f2->decimals() < (uint32_t)row.getScale(s))
f2->dec = (uint32_t)row.getScale(s);
if ((f2->decimals() == DECIMAL_NOT_SPECIFIED && row.getScale(s) > 0)
|| f2->decimals() < row.getScale(s))
{
f2->dec = row.getScale(s);
}
f2->store(dl);
@ -5275,8 +5278,6 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE
execplan::CalpontSelectExecutionPlan::ColumnMap::iterator colMapIter;
execplan::CalpontSelectExecutionPlan::ColumnMap::iterator condColMapIter;
execplan::ParseTree* ptIt;
execplan::ReturnedColumn* rcIt;
for (TABLE_LIST* tl = gi.groupByTables; tl; tl = tl->next_local)
{
mapiter = ci->tableMap.find(tl->table);

View File

@ -340,6 +340,7 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n
ac->distinct(item_sum->has_with_distinct());
Window_spec* win_spec = wf->window_spec;
SRCP srcp;
CalpontSystemCatalog::ColType ct; // For return type
// arguments
vector<SRCP> funcParms;
@ -370,18 +371,25 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n
context.setColWidth(rt.colWidth);
context.setScale(rt.scale);
context.setPrecision(rt.precision);
context.setParamCount(funcParms.size());
mcsv1sdk::ColumnDatum colType;
mcsv1sdk::ColumnDatum colTypes[funcParms.size()];
// Turn on the Analytic flag so the function is aware it is being called
// as a Window Function.
context.setContextFlag(CONTEXT_IS_ANALYTIC);
COL_TYPES colTypes;
execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter;
// Build the column type vector.
// Modified for MCOL-1201 multi-argument aggregate
for (size_t i = 0; i < funcParms.size(); ++i)
{
colTypes.push_back(make_pair(funcParms[i]->alias(), funcParms[i]->resultType().colDataType));
const execplan::CalpontSystemCatalog::ColType& resultType
= funcParms[i]->resultType();
colType.dataType = resultType.colDataType;
colType.precision = resultType.precision;
colType.scale = resultType.scale;
colTypes[i] = colType;
}
// Call the user supplied init()
@ -401,7 +409,6 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n
}
// Set the return type as set in init()
CalpontSystemCatalog::ColType ct;
ct.colDataType = context.getResultType();
ct.colWidth = context.getColWidth();
ct.scale = context.getScale();
@ -419,10 +426,10 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n
{
case Item_sum::UDF_SUM_FUNC:
{
uint64_t bIgnoreNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS));
char sIgnoreNulls[18];
sprintf(sIgnoreNulls, "%lu", bIgnoreNulls);
srcp.reset(new ConstantColumn(sIgnoreNulls, (uint64_t)bIgnoreNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT
uint64_t bRespectNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) ? 0 : 1;
char sRespectNulls[18];
sprintf(sRespectNulls, "%lu", bRespectNulls);
srcp.reset(new ConstantColumn(sRespectNulls, (uint64_t)bRespectNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT
funcParms.push_back(srcp);
break;
}
@ -881,11 +888,13 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n
return NULL;
}
ac->resultType(colType_MysqlToIDB(item_sum));
// bug5736. Make the result type double for some window functions when
// infinidb_double_for_decimal_math is set.
ac->adjustResultType();
if (item_sum->sum_func() != Item_sum::UDF_SUM_FUNC)
{
ac->resultType(colType_MysqlToIDB(item_sum));
// bug5736. Make the result type double for some window functions when
// infinidb_double_for_decimal_math is set.
ac->adjustResultType();
}
ac->expressionId(ci->expressionId++);

View File

@ -9,123 +9,142 @@
* http://www.boost.org/LICENSE_1_0.txt
*/
#include <stdint.h>
#include <stdexcept>
namespace static_any
{
namespace anyimpl
{
struct bad_any_cast
{
};
struct bad_any_cast
{
};
struct empty_any
{
};
struct empty_any
{
};
struct base_any_policy
{
virtual void static_delete(void** x) = 0;
virtual void copy_from_value(void const* src, void** dest) = 0;
virtual void clone(void* const* src, void** dest) = 0;
virtual void move(void* const* src, void** dest) = 0;
virtual void* get_value(void** src) = 0;
virtual size_t get_size() = 0;
};
struct base_any_policy
{
virtual void static_delete(void** x) = 0;
virtual void copy_from_value(void const* src, void** dest) = 0;
virtual void clone(void* const* src, void** dest) = 0;
virtual void move(void* const* src, void** dest) = 0;
virtual void* get_value(void** src) = 0;
virtual size_t get_size() = 0;
};
template<typename T>
struct typed_base_any_policy : base_any_policy
{
virtual size_t get_size()
{
return sizeof(T);
}
};
template<typename T>
struct typed_base_any_policy : base_any_policy
{
virtual size_t get_size() { return sizeof(T); }
};
template<typename T>
struct small_any_policy : typed_base_any_policy<T>
{
virtual void static_delete(void** x)
{
}
virtual void copy_from_value(void const* src, void** dest)
{
new(dest) T(*reinterpret_cast<T const*>(src));
}
virtual void clone(void* const* src, void** dest)
{
*dest = *src;
}
virtual void move(void* const* src, void** dest)
{
*dest = *src;
}
virtual void* get_value(void** src)
{
return reinterpret_cast<void*>(src);
}
};
template<typename T>
struct small_any_policy : typed_base_any_policy<T>
{
virtual void static_delete(void** x) { }
virtual void copy_from_value(void const* src, void** dest)
{ new(dest) T(*reinterpret_cast<T const*>(src)); }
virtual void clone(void* const* src, void** dest) { *dest = *src; }
virtual void move(void* const* src, void** dest) { *dest = *src; }
virtual void* get_value(void** src) { return reinterpret_cast<void*>(src); }
};
template<typename T>
struct big_any_policy : typed_base_any_policy<T>
{
virtual void static_delete(void** x)
template<typename T>
struct big_any_policy : typed_base_any_policy<T>
{
virtual void static_delete(void** x)
{
if (*x)
delete(*reinterpret_cast<T**>(x));
delete(*reinterpret_cast<T**>(x));
*x = NULL;
}
virtual void copy_from_value(void const* src, void** dest)
virtual void copy_from_value(void const* src, void** dest)
{
*dest = new T(*reinterpret_cast<T const*>(src));
*dest = new T(*reinterpret_cast<T const*>(src));
}
virtual void clone(void* const* src, void** dest)
virtual void clone(void* const* src, void** dest)
{
*dest = new T(**reinterpret_cast<T* const*>(src));
*dest = new T(**reinterpret_cast<T* const*>(src));
}
virtual void move(void* const* src, void** dest)
virtual void move(void* const* src, void** dest)
{
(*reinterpret_cast<T**>(dest))->~T();
**reinterpret_cast<T**>(dest) = **reinterpret_cast<T* const*>(src);
(*reinterpret_cast<T**>(dest))->~T();
**reinterpret_cast<T**>(dest) = **reinterpret_cast<T* const*>(src);
}
virtual void* get_value(void** src) { return *src; }
};
virtual void* get_value(void** src)
{
return *src;
}
};
template<typename T>
struct choose_policy
{
typedef big_any_policy<T> type;
};
template<typename T>
struct choose_policy
{
typedef big_any_policy<T> type;
};
template<typename T>
struct choose_policy<T*>
{
typedef small_any_policy<T*> type;
};
template<typename T>
struct choose_policy<T*>
{
typedef small_any_policy<T*> type;
};
struct any;
struct any;
/// Choosing the policy for an any type is illegal, but should never happen.
/// This is designed to throw a compiler error.
template<>
struct choose_policy<any>
{
typedef void type;
};
/// Choosing the policy for an any type is illegal, but should never happen.
/// This is designed to throw a compiler error.
template<>
struct choose_policy<any>
{
typedef void type;
};
/// Specializations for small types.
#define SMALL_POLICY(TYPE) template<> struct \
choose_policy<TYPE> { typedef small_any_policy<TYPE> type; };
/// Specializations for small types.
#define SMALL_POLICY(TYPE) template<> struct \
choose_policy<TYPE> { typedef small_any_policy<TYPE> type; };
SMALL_POLICY(char);
SMALL_POLICY(signed char);
SMALL_POLICY(unsigned char);
SMALL_POLICY(signed short);
SMALL_POLICY(unsigned short);
SMALL_POLICY(signed int);
SMALL_POLICY(unsigned int);
SMALL_POLICY(signed long);
SMALL_POLICY(unsigned long);
SMALL_POLICY(signed long long);
SMALL_POLICY(unsigned long long);
SMALL_POLICY(float);
SMALL_POLICY(double);
SMALL_POLICY(bool);
SMALL_POLICY(char);
SMALL_POLICY(signed char);
SMALL_POLICY(unsigned char);
SMALL_POLICY(signed short);
SMALL_POLICY(unsigned short);
SMALL_POLICY(signed int);
SMALL_POLICY(unsigned int);
SMALL_POLICY(signed long);
SMALL_POLICY(unsigned long);
SMALL_POLICY(signed long long);
SMALL_POLICY(unsigned long long);
SMALL_POLICY(float);
SMALL_POLICY(double);
SMALL_POLICY(bool);
#undef SMALL_POLICY
#undef SMALL_POLICY
/// This function will return a different policy for each type.
template<typename T>
base_any_policy* get_policy()
{
static typename choose_policy<T>::type policy;
return &policy;
};
/// This function will return a different policy for each type.
template<typename T>
base_any_policy* get_policy()
{
static typename choose_policy<T>::type policy;
return &policy;
};
}
class any
@ -139,37 +158,40 @@ public:
/// Initializing constructor.
template <typename T>
any(const T& x)
: policy(anyimpl::get_policy<anyimpl::empty_any>()), object(NULL)
: policy(anyimpl::get_policy<anyimpl::empty_any>()), object(NULL)
{
assign(x);
}
/// Empty constructor.
any()
: policy(anyimpl::get_policy<anyimpl::empty_any>()), object(NULL)
{ }
: policy(anyimpl::get_policy<anyimpl::empty_any>()), object(NULL)
{
}
/// Special initializing constructor for string literals.
any(const char* x)
: policy(anyimpl::get_policy<anyimpl::empty_any>()), object(NULL)
{
: policy(anyimpl::get_policy<anyimpl::empty_any>()), object(NULL)
{
assign(x);
}
/// Copy constructor.
any(const any& x)
: policy(anyimpl::get_policy<anyimpl::empty_any>()), object(NULL)
{
: policy(anyimpl::get_policy<anyimpl::empty_any>()), object(NULL)
{
assign(x);
}
/// Destructor.
~any() {
~any()
{
policy->static_delete(&object);
}
/// Assignment function from another any.
any& assign(const any& x) {
any& assign(const any& x)
{
reset();
policy = x.policy;
policy->clone(&x.object, &object);
@ -178,7 +200,8 @@ public:
/// Assignment function.
template <typename T>
any& assign(const T& x) {
any& assign(const T& x)
{
reset();
policy = anyimpl::get_policy<T>();
policy->copy_from_value(&x, &object);
@ -197,8 +220,42 @@ public:
return assign(x);
}
/// Less than operator for sorting
bool operator<(const any& x) const
{
if (policy == x.policy)
{
void* p1 = const_cast<void*>(object);
void* p2 = const_cast<void*>(x.object);
return memcmp(policy->get_value(&p1),
x.policy->get_value(&p2),
policy->get_size()) < 0 ? 1 : 0;
}
return 0;
}
/// equal operator
bool operator==(const any& x) const
{
if (policy == x.policy)
{
void* p1 = const_cast<void*>(object);
void* p2 = const_cast<void*>(x.object);
return memcmp(policy->get_value(&p1),
x.policy->get_value(&p2),
policy->get_size()) == 0 ? 1 : 0;
}
return 0;
}
/// Utility functions
any& swap(any& x) {
uint8_t getHash() const
{
void* p1 = const_cast<void*>(object);
return *(uint64_t*)policy->get_value(&p1) % 4048;
}
any& swap(any& x)
{
std::swap(policy, x.policy);
std::swap(object, x.object);
return *this;
@ -206,27 +263,32 @@ public:
/// Cast operator. You can only cast to the original type.
template<typename T>
T& cast() {
if (policy != anyimpl::get_policy<T>())
T& cast()
{
if (policy != anyimpl::get_policy<T>())
throw anyimpl::bad_any_cast();
T* r = reinterpret_cast<T*>(policy->get_value(&object));
return *r;
}
/// Returns true if the any contains no value.
bool empty() const {
bool empty() const
{
return policy == anyimpl::get_policy<anyimpl::empty_any>();
}
/// Frees any allocated memory, and sets the value to NULL.
void reset() {
void reset()
{
policy->static_delete(&object);
policy = anyimpl::get_policy<anyimpl::empty_any>();
}
/// Returns true if the two types are the same.
bool compatible(const any& x) const {
bool compatible(const any& x) const
{
return policy == x.policy;
}
};
}

View File

@ -215,6 +215,22 @@ inline string getStringNullValue()
namespace rowgroup
{
const std::string typeStr("");
const static_any::any& RowAggregation::charTypeId((char)1);
const static_any::any& RowAggregation::scharTypeId((signed char)1);
const static_any::any& RowAggregation::shortTypeId((short)1);
const static_any::any& RowAggregation::intTypeId((int)1);
const static_any::any& RowAggregation::longTypeId((long)1);
const static_any::any& RowAggregation::llTypeId((long long)1);
const static_any::any& RowAggregation::ucharTypeId((unsigned char)1);
const static_any::any& RowAggregation::ushortTypeId((unsigned short)1);
const static_any::any& RowAggregation::uintTypeId((unsigned int)1);
const static_any::any& RowAggregation::ulongTypeId((unsigned long)1);
const static_any::any& RowAggregation::ullTypeId((unsigned long long)1);
const static_any::any& RowAggregation::floatTypeId((float)1);
const static_any::any& RowAggregation::doubleTypeId((double)1);
const static_any::any& RowAggregation::strTypeId(typeStr);
KeyStorage::KeyStorage(const RowGroup& keys, Row** tRow) : tmpRow(tRow), rg(keys)
{
RGData data(rg);
@ -691,7 +707,8 @@ RowAggregation::RowAggregation(const vector<SP_ROWAGG_GRPBY_t>& rowAggGroupByCol
RowAggregation::RowAggregation(const RowAggregation& rhs):
fAggMapPtr(NULL), fRowGroupOut(NULL),
fTotalRowCount(0), fMaxTotalRowCount(AGG_ROWGROUP_SIZE),
fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0)
fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0),
fRGContext(rhs.fRGContext)
{
//fGroupByCols.clear();
//fFunctionCols.clear();
@ -756,7 +773,6 @@ void RowAggregation::addRowGroup(const RowGroup* pRows, vector<Row::Pointer>& in
{
// this function is for threaded aggregation, which is for group by and distinct.
// if (countSpecial(pRows))
Row rowIn;
pRows->initRow(&rowIn);
@ -790,7 +806,7 @@ void RowAggregation::setJoinRowGroups(vector<RowGroup>* pSmallSideRG, RowGroup*
}
//------------------------------------------------------------------------------
// For UDAF, we need to sometimes start a new context.
// For UDAF, we need to sometimes start a new fRGContext.
//
// This will be called any number of times by each of the batchprimitiveprocessor
// threads on the PM and by multple threads on the UM. It must remain
@ -801,29 +817,29 @@ void RowAggregation::resetUDAF(uint64_t funcColID)
// Get the UDAF class pointer and store in the row definition object.
RowUDAFFunctionCol* rowUDAF = dynamic_cast<RowUDAFFunctionCol*>(fFunctionCols[funcColID].get());
// resetUDAF needs to be re-entrant. Since we're modifying the context object
// by creating a new userData, we need a local copy. The copy constructor
// doesn't copy userData.
mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext);
// RowAggregation and it's functions need to be re-entrant which means
// each instance (thread) needs its own copy of the context object.
// Note: operator=() doesn't copy userData.
fRGContext = rowUDAF->fUDAFContext;
// Call the user reset for the group userData. Since, at this point,
// context's userData will be NULL, reset will generate a new one.
mcsv1sdk::mcsv1_UDAF::ReturnCode rc;
rc = rgContext.getFunction()->reset(&rgContext);
rc = fRGContext.getFunction()->reset(&fRGContext);
if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
{
rowUDAF->bInterrupted = true;
throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr);
throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr);
}
fRow.setUserDataStore(fRowGroupOut->getRGData()->getUserDataStore());
fRow.setUserData(rgContext,
rgContext.getUserDataSP(),
rgContext.getUserDataSize(),
fRow.setUserData(fRGContext,
fRGContext.getUserDataSP(),
fRGContext.getUserDataSize(),
rowUDAF->fAuxColumnIndex);
rgContext.setUserData(NULL); // Prevents calling deleteUserData on the context.
fRGContext.setUserData(NULL); // Prevents calling deleteUserData on the fRGContext.
}
//------------------------------------------------------------------------------
@ -873,7 +889,6 @@ void RowAggregation::initialize()
}
}
// Save the RowGroup data pointer
fResultDataVec.push_back(fRowGroupOut->getRGData());
@ -1658,10 +1673,11 @@ void RowAggregation::updateEntry(const Row& rowIn)
{
for (uint64_t i = 0; i < fFunctionCols.size(); i++)
{
int64_t colIn = fFunctionCols[i]->fInputColumnIndex;
int64_t colOut = fFunctionCols[i]->fOutputColumnIndex;
SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[i];
int64_t colIn = pFunctionCol->fInputColumnIndex;
int64_t colOut = pFunctionCol->fOutputColumnIndex;
switch (fFunctionCols[i]->fAggFunction)
switch (pFunctionCol->fAggFunction)
{
case ROWAGG_COUNT_COL_NAME:
@ -1675,7 +1691,7 @@ void RowAggregation::updateEntry(const Row& rowIn)
case ROWAGG_MIN:
case ROWAGG_MAX:
case ROWAGG_SUM:
doMinMaxSum(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction);
doMinMaxSum(rowIn, colIn, colOut, pFunctionCol->fAggFunction);
break;
case ROWAGG_AVG:
@ -1692,7 +1708,7 @@ void RowAggregation::updateEntry(const Row& rowIn)
case ROWAGG_BIT_OR:
case ROWAGG_BIT_XOR:
{
doBitOp(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction);
doBitOp(rowIn, colIn, colOut, pFunctionCol->fAggFunction);
break;
}
@ -1707,11 +1723,11 @@ void RowAggregation::updateEntry(const Row& rowIn)
case ROWAGG_UDAF:
{
RowUDAFFunctionCol* rowUDAF = dynamic_cast<RowUDAFFunctionCol*>(fFunctionCols[i].get());
RowUDAFFunctionCol* rowUDAF = dynamic_cast<RowUDAFFunctionCol*>(pFunctionCol.get());
if (rowUDAF)
{
doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF);
doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF, i);
}
else
{
@ -1725,7 +1741,7 @@ void RowAggregation::updateEntry(const Row& rowIn)
{
std::ostringstream errmsg;
errmsg << "RowAggregation: function (id = " <<
(uint64_t) fFunctionCols[i]->fAggFunction << ") is not supported.";
(uint64_t) pFunctionCol->fAggFunction << ") is not supported.";
cerr << errmsg.str() << endl;
throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr);
break;
@ -1997,131 +2013,142 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu
}
void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux,
RowUDAFFunctionCol* rowUDAF)
RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx)
{
std::vector<mcsv1sdk::ColumnDatum> valsIn;
execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupIn.getColTypes()[colIn];
std::vector<uint32_t> dataFlags;
int32_t paramCount = fRGContext.getParameterCount();
// The vector of parameters to be sent to the UDAF
mcsv1sdk::ColumnDatum valsIn[paramCount];
uint32_t dataFlags[paramCount];
// Get the context for this rowGroup. Make a copy so we're thread safe.
mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext);
// Turn on NULL flags
std::vector<uint32_t> flags;
uint32_t flag = 0;
if (isNull(&fRowGroupIn, rowIn, colIn) == true)
execplan::CalpontSystemCatalog::ColDataType colDataType;
for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i)
{
if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS))
mcsv1sdk::ColumnDatum& datum = valsIn[i];
// Turn on NULL flags
dataFlags[i] = 0;
if (isNull(&fRowGroupIn, rowIn, colIn) == true)
{
return;
if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS))
{
return;
}
dataFlags[i] |= mcsv1sdk::PARAM_IS_NULL;
}
colDataType = fRowGroupIn.getColTypes()[colIn];
if (!fRGContext.isParamNull(i))
{
switch (colDataType)
{
case execplan::CalpontSystemCatalog::TINYINT:
case execplan::CalpontSystemCatalog::SMALLINT:
case execplan::CalpontSystemCatalog::MEDINT:
case execplan::CalpontSystemCatalog::INT:
case execplan::CalpontSystemCatalog::BIGINT:
case execplan::CalpontSystemCatalog::DECIMAL:
case execplan::CalpontSystemCatalog::UDECIMAL:
{
datum.dataType = execplan::CalpontSystemCatalog::BIGINT;
datum.columnData = rowIn.getIntField(colIn);
datum.scale = fRowGroupIn.getScale()[colIn];
datum.precision = fRowGroupIn.getPrecision()[colIn];
break;
}
case execplan::CalpontSystemCatalog::UTINYINT:
case execplan::CalpontSystemCatalog::USMALLINT:
case execplan::CalpontSystemCatalog::UMEDINT:
case execplan::CalpontSystemCatalog::UINT:
case execplan::CalpontSystemCatalog::UBIGINT:
{
datum.dataType = execplan::CalpontSystemCatalog::UBIGINT;
datum.columnData = rowIn.getUintField(colIn);
break;
}
case execplan::CalpontSystemCatalog::DOUBLE:
case execplan::CalpontSystemCatalog::UDOUBLE:
{
datum.dataType = execplan::CalpontSystemCatalog::DOUBLE;
datum.columnData = rowIn.getDoubleField(colIn);
break;
}
case execplan::CalpontSystemCatalog::FLOAT:
case execplan::CalpontSystemCatalog::UFLOAT:
{
datum.dataType = execplan::CalpontSystemCatalog::FLOAT;
datum.columnData = rowIn.getFloatField(colIn);
break;
}
case execplan::CalpontSystemCatalog::DATE:
case execplan::CalpontSystemCatalog::DATETIME:
{
datum.dataType = execplan::CalpontSystemCatalog::UBIGINT;
datum.columnData = rowIn.getUintField(colIn);
break;
}
case execplan::CalpontSystemCatalog::TIME:
{
datum.dataType = execplan::CalpontSystemCatalog::BIGINT;
datum.columnData = rowIn.getIntField(colIn);
break;
}
case execplan::CalpontSystemCatalog::CHAR:
case execplan::CalpontSystemCatalog::VARCHAR:
case execplan::CalpontSystemCatalog::TEXT:
case execplan::CalpontSystemCatalog::VARBINARY:
case execplan::CalpontSystemCatalog::CLOB:
case execplan::CalpontSystemCatalog::BLOB:
{
datum.dataType = colDataType;
datum.columnData = rowIn.getStringField(colIn);
break;
}
default:
{
std::ostringstream errmsg;
errmsg << "RowAggregation " << fRGContext.getName() <<
": No logic for data type: " << colDataType;
throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr);
break;
}
}
}
flag |= mcsv1sdk::PARAM_IS_NULL;
}
flags.push_back(flag);
rgContext.setDataFlags(&flags);
mcsv1sdk::ColumnDatum datum;
if (!rgContext.isParamNull(0))
{
switch (colDataType)
// MCOL-1201: If there are multiple parameters, the next fFunctionCols
// will have the column used. By incrementing the funcColsIdx (passed by
// ref, we also increment the caller's index.
if (fFunctionCols.size() > funcColsIdx + 1
&& fFunctionCols[funcColsIdx+1]->fAggFunction == ROWAGG_MULTI_PARM)
{
case execplan::CalpontSystemCatalog::TINYINT:
case execplan::CalpontSystemCatalog::SMALLINT:
case execplan::CalpontSystemCatalog::MEDINT:
case execplan::CalpontSystemCatalog::INT:
case execplan::CalpontSystemCatalog::BIGINT:
case execplan::CalpontSystemCatalog::DECIMAL:
case execplan::CalpontSystemCatalog::UDECIMAL:
{
datum.dataType = execplan::CalpontSystemCatalog::BIGINT;
datum.columnData = rowIn.getIntField(colIn);
datum.scale = fRowGroupIn.getScale()[colIn];
datum.precision = fRowGroupIn.getPrecision()[colIn];
break;
}
case execplan::CalpontSystemCatalog::UTINYINT:
case execplan::CalpontSystemCatalog::USMALLINT:
case execplan::CalpontSystemCatalog::UMEDINT:
case execplan::CalpontSystemCatalog::UINT:
case execplan::CalpontSystemCatalog::UBIGINT:
{
datum.dataType = execplan::CalpontSystemCatalog::UBIGINT;
datum.columnData = rowIn.getUintField(colIn);
break;
}
case execplan::CalpontSystemCatalog::DOUBLE:
case execplan::CalpontSystemCatalog::UDOUBLE:
{
datum.dataType = execplan::CalpontSystemCatalog::DOUBLE;
datum.columnData = rowIn.getDoubleField(colIn);
break;
}
case execplan::CalpontSystemCatalog::FLOAT:
case execplan::CalpontSystemCatalog::UFLOAT:
{
datum.dataType = execplan::CalpontSystemCatalog::FLOAT;
datum.columnData = rowIn.getFloatField(colIn);
break;
}
case execplan::CalpontSystemCatalog::DATE:
case execplan::CalpontSystemCatalog::DATETIME:
{
datum.dataType = execplan::CalpontSystemCatalog::UBIGINT;
datum.columnData = rowIn.getUintField(colIn);
break;
}
case execplan::CalpontSystemCatalog::TIME:
{
datum.dataType = execplan::CalpontSystemCatalog::BIGINT;
datum.columnData = rowIn.getIntField(colIn);
break;
}
case execplan::CalpontSystemCatalog::CHAR:
case execplan::CalpontSystemCatalog::VARCHAR:
case execplan::CalpontSystemCatalog::TEXT:
case execplan::CalpontSystemCatalog::VARBINARY:
case execplan::CalpontSystemCatalog::CLOB:
case execplan::CalpontSystemCatalog::BLOB:
{
datum.dataType = colDataType;
datum.columnData = rowIn.getStringField(colIn);
break;
}
default:
{
std::ostringstream errmsg;
errmsg << "RowAggregation " << rgContext.getName() <<
": No logic for data type: " << colDataType;
throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr);
break;
}
++funcColsIdx;
SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx];
colIn = pFunctionCol->fInputColumnIndex;
colOut = pFunctionCol->fOutputColumnIndex;
}
else
{
break;
}
}
valsIn.push_back(datum);
// The intermediate values are stored in userData referenced by colAux.
rgContext.setUserData(fRow.getUserData(colAux));
fRGContext.setDataFlags(dataFlags);
fRGContext.setUserData(fRow.getUserData(colAux));
mcsv1sdk::mcsv1_UDAF::ReturnCode rc;
rc = rgContext.getFunction()->nextValue(&rgContext, valsIn);
rgContext.setUserData(NULL);
rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn);
fRGContext.setUserData(NULL);
if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
{
rowUDAF->bInterrupted = true;
throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr);
throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr);
}
}
@ -2218,6 +2245,7 @@ RowAggregationUM::RowAggregationUM(const RowAggregationUM& rhs) :
fHasAvg(rhs.fHasAvg),
fKeyOnHeap(rhs.fKeyOnHeap),
fHasStatsFunc(rhs.fHasStatsFunc),
fHasUDAF(rhs.fHasUDAF),
fExpression(rhs.fExpression),
fTotalMemUsage(rhs.fTotalMemUsage),
fRm(rhs.fRm),
@ -2419,7 +2447,7 @@ void RowAggregationUM::updateEntry(const Row& rowIn)
if (rowUDAF)
{
doUDAF(rowIn, colIn, colOut, colAux, rowUDAF);
doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i);
}
else
{
@ -2585,22 +2613,6 @@ void RowAggregationUM::calculateAvgColumns()
// Sets the value from valOut into column colOut, performing any conversions.
void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut)
{
static const static_any::any& charTypeId((char)1);
static const static_any::any& scharTypeId((signed char)1);
static const static_any::any& shortTypeId((short)1);
static const static_any::any& intTypeId((int)1);
static const static_any::any& longTypeId((long)1);
static const static_any::any& llTypeId((long long)1);
static const static_any::any& ucharTypeId((unsigned char)1);
static const static_any::any& ushortTypeId((unsigned short)1);
static const static_any::any& uintTypeId((unsigned int)1);
static const static_any::any& ulongTypeId((unsigned long)1);
static const static_any::any& ullTypeId((unsigned long long)1);
static const static_any::any& floatTypeId((float)1);
static const static_any::any& doubleTypeId((double)1);
static const std::string typeStr("");
static const static_any::any& strTypeId(typeStr);
execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut];
if (valOut.empty())
@ -2609,6 +2621,179 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut)
return;
}
int64_t intOut = 0;
uint64_t uintOut = 0;
float floatOut = 0.0;
double doubleOut = 0.0;
ostringstream oss;
std::string strOut;
bool bSetSuccess = false;
switch (colDataType)
{
case execplan::CalpontSystemCatalog::BIT:
case execplan::CalpontSystemCatalog::TINYINT:
if (valOut.compatible(charTypeId))
{
intOut = valOut.cast<char>();
bSetSuccess = true;
}
else if (valOut.compatible(scharTypeId))
{
intOut = valOut.cast<signed char>();
bSetSuccess = true;
}
if (bSetSuccess)
{
fRow.setIntField<1>(intOut, colOut);
}
break;
case execplan::CalpontSystemCatalog::SMALLINT:
case execplan::CalpontSystemCatalog::MEDINT:
if (valOut.compatible(shortTypeId))
{
intOut = valOut.cast<short>();
fRow.setIntField<2>(intOut, colOut);
bSetSuccess = true;
}
break;
case execplan::CalpontSystemCatalog::INT:
if (valOut.compatible(uintTypeId))
{
intOut = valOut.cast<int>();
bSetSuccess = true;
}
else if (valOut.compatible(longTypeId))
{
intOut = valOut.cast<long>();
bSetSuccess = true;
}
if (bSetSuccess)
{
fRow.setIntField<4>(intOut, colOut);
}
break;
case execplan::CalpontSystemCatalog::BIGINT:
case execplan::CalpontSystemCatalog::DECIMAL:
case execplan::CalpontSystemCatalog::UDECIMAL:
if (valOut.compatible(llTypeId))
{
intOut = valOut.cast<long long>();
fRow.setIntField<8>(intOut, colOut);
bSetSuccess = true;
}
break;
case execplan::CalpontSystemCatalog::UTINYINT:
if (valOut.compatible(ucharTypeId))
{
uintOut = valOut.cast<unsigned char>();
fRow.setUintField<1>(uintOut, colOut);
bSetSuccess = true;
}
break;
case execplan::CalpontSystemCatalog::USMALLINT:
case execplan::CalpontSystemCatalog::UMEDINT:
if (valOut.compatible(ushortTypeId))
{
uintOut = valOut.cast<unsigned short>();
fRow.setUintField<2>(uintOut, colOut);
bSetSuccess = true;
}
break;
case execplan::CalpontSystemCatalog::UINT:
if (valOut.compatible(uintTypeId))
{
uintOut = valOut.cast<unsigned int>();
fRow.setUintField<4>(uintOut, colOut);
bSetSuccess = true;
}
break;
case execplan::CalpontSystemCatalog::UBIGINT:
if (valOut.compatible(ulongTypeId))
{
uintOut = valOut.cast<unsigned long>();
fRow.setUintField<8>(uintOut, colOut);
bSetSuccess = true;
}
break;
case execplan::CalpontSystemCatalog::DATE:
case execplan::CalpontSystemCatalog::DATETIME:
if (valOut.compatible(ulongTypeId))
{
uintOut = valOut.cast<unsigned long>();
fRow.setUintField<8>(uintOut, colOut);
bSetSuccess = true;
}
break;
case execplan::CalpontSystemCatalog::FLOAT:
case execplan::CalpontSystemCatalog::UFLOAT:
if (valOut.compatible(floatTypeId))
{
floatOut = valOut.cast<float>();
fRow.setFloatField(floatOut, colOut);
bSetSuccess = true;
}
break;
case execplan::CalpontSystemCatalog::DOUBLE:
case execplan::CalpontSystemCatalog::UDOUBLE:
if (valOut.compatible(doubleTypeId))
{
doubleOut = valOut.cast<double>();
fRow.setDoubleField(doubleOut, colOut);
bSetSuccess = true;
}
break;
case execplan::CalpontSystemCatalog::CHAR:
case execplan::CalpontSystemCatalog::VARCHAR:
case execplan::CalpontSystemCatalog::TEXT:
if (valOut.compatible(strTypeId))
{
std::string strOut = valOut.cast<std::string>();
fRow.setStringField(strOut, colOut);
bSetSuccess = true;
}
break;
case execplan::CalpontSystemCatalog::VARBINARY:
case execplan::CalpontSystemCatalog::CLOB:
case execplan::CalpontSystemCatalog::BLOB:
if (valOut.compatible(strTypeId))
{
std::string strOut = valOut.cast<std::string>();
fRow.setVarBinaryField(strOut, colOut);
bSetSuccess = true;
}
break;
default:
{
std::ostringstream errmsg;
errmsg << "RowAggregation: No logic for data type: " << colDataType;
throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr);
break;
}
}
if (!bSetSuccess)
{
SetUDAFAnyValue(valOut, colOut);
}
}
void RowAggregationUM::SetUDAFAnyValue(static_any::any& valOut, int64_t colOut)
{
execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut];
// This may seem a bit convoluted. Users shouldn't return a type
// that they didn't set in mcsv1_UDAF::init(), but this
// handles whatever return type is given and casts
@ -2814,7 +2999,7 @@ void RowAggregationUM::calculateUDAFColumns()
continue;
rowUDAF = dynamic_cast<RowUDAFFunctionCol*>(fFunctionCols[i].get());
mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext);
fRGContext = rowUDAF->fUDAFContext;
int64_t colOut = rowUDAF->fOutputColumnIndex;
int64_t colAux = rowUDAF->fAuxColumnIndex;
@ -2826,26 +3011,26 @@ void RowAggregationUM::calculateUDAFColumns()
fRowGroupOut->getRow(j, &fRow);
// Turn the NULL flag off. We can't know NULL at this point
rgContext.setDataFlags(NULL);
fRGContext.setDataFlags(NULL);
// The intermediate values are stored in colAux.
rgContext.setUserData(fRow.getUserData(colAux));
fRGContext.setUserData(fRow.getUserData(colAux));
// Call the UDAF evaluate function
mcsv1sdk::mcsv1_UDAF::ReturnCode rc;
rc = rgContext.getFunction()->evaluate(&rgContext, valOut);
rgContext.setUserData(NULL);
rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut);
fRGContext.setUserData(NULL);
if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
{
rowUDAF->bInterrupted = true;
throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr);
throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr);
}
// Set the returned value into the output row
SetUDAFValue(valOut, colOut);
}
rgContext.setUserData(NULL);
fRGContext.setUserData(NULL);
}
}
@ -3116,54 +3301,60 @@ void RowAggregationUM::doNullConstantAggregate(const ConstantAggData& aggData, u
{
// For a NULL constant, call nextValue with NULL and then evaluate.
bool bInterrupted = false;
mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext);
context.setInterrupted(bInterrupted);
context.createUserData();
fRGContext.setInterrupted(bInterrupted);
fRGContext.createUserData();
mcsv1sdk::mcsv1_UDAF::ReturnCode rc;
std::vector<mcsv1sdk::ColumnDatum> valsIn;
mcsv1sdk::ColumnDatum valsIn[1];
// Call a reset, then nextValue, then execute. This will evaluate
// the UDAF for the constant.
rc = context.getFunction()->reset(&context);
rc = fRGContext.getFunction()->reset(&fRGContext);
if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
{
context.setInterrupted(true);
throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr);
fRGContext.setInterrupted(true);
throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr);
}
#if 0
uint32_t dataFlags[fRGContext.getParameterCount()];
for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i)
{
mcsv1sdk::ColumnDatum& datum = valsIn[i];
// Turn on NULL flags
dataFlags[i] = 0;
}
#endif
// Turn the NULL and CONSTANT flags on.
std::vector<uint32_t> flags;
uint32_t flag = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT;
flags.push_back(flag);
context.setDataFlags(&flags);
uint32_t flags[1];
flags[0] = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT;
fRGContext.setDataFlags(flags);
// Create a dummy datum
mcsv1sdk::ColumnDatum datum;
mcsv1sdk::ColumnDatum& datum = valsIn[0];
datum.dataType = execplan::CalpontSystemCatalog::BIGINT;
datum.columnData = 0;
valsIn.push_back(datum);
rc = context.getFunction()->nextValue(&context, valsIn);
rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn);
if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
{
context.setInterrupted(true);
throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr);
fRGContext.setInterrupted(true);
throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr);
}
static_any::any valOut;
rc = context.getFunction()->evaluate(&context, valOut);
rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut);
fRGContext.setUserData(NULL);
if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
{
context.setInterrupted(true);
throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr);
fRGContext.setInterrupted(true);
throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr);
}
// Set the returned value into the output row
SetUDAFValue(valOut, colOut);
context.setDataFlags(NULL);
fRGContext.setDataFlags(NULL);
}
break;
@ -3460,30 +3651,28 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData
case ROWAGG_UDAF:
{
bool bInterrupted = false;
mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext);
context.setInterrupted(bInterrupted);
context.createUserData();
fRGContext.setInterrupted(bInterrupted);
fRGContext.createUserData();
mcsv1sdk::mcsv1_UDAF::ReturnCode rc;
std::vector<mcsv1sdk::ColumnDatum> valsIn;
mcsv1sdk::ColumnDatum valsIn[1];
// Call a reset, then nextValue, then execute. This will evaluate
// the UDAF for the constant.
rc = context.getFunction()->reset(&context);
rc = fRGContext.getFunction()->reset(&fRGContext);
if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
{
context.setInterrupted(true);
throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr);
fRGContext.setInterrupted(true);
throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr);
}
// Turn the CONSTANT flags on.
std::vector<uint32_t> flags;
uint32_t flag = mcsv1sdk::PARAM_IS_CONSTANT;
flags.push_back(flag);
context.setDataFlags(&flags);
uint32_t flags[1];
flags[0] = mcsv1sdk::PARAM_IS_CONSTANT;
fRGContext.setDataFlags(flags);
// Create a datum item for sending to UDAF
mcsv1sdk::ColumnDatum datum;
mcsv1sdk::ColumnDatum& datum = valsIn[0];
datum.dataType = (CalpontSystemCatalog::ColDataType)colDataType;
switch (colDataType)
@ -3567,27 +3756,27 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData
break;
}
valsIn.push_back(datum);
rc = context.getFunction()->nextValue(&context, valsIn);
rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn);
if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
{
context.setInterrupted(true);
throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr);
fRGContext.setInterrupted(true);
throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr);
}
static_any::any valOut;
rc = context.getFunction()->evaluate(&context, valOut);
rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut);
fRGContext.setUserData(NULL);
if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
{
context.setInterrupted(true);
throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr);
fRGContext.setInterrupted(true);
throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr);
}
// Set the returned value into the output row
SetUDAFValue(valOut, colOut);
context.setDataFlags(NULL);
fRGContext.setDataFlags(NULL);
}
break;
@ -3806,7 +3995,7 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn)
if (rowUDAF)
{
doUDAF(rowIn, colIn, colOut, colAux, rowUDAF);
doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i);
}
else
{
@ -4011,45 +4200,43 @@ void RowAggregationUMP2::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut
// rowUDAF(in) - pointer to the RowUDAFFunctionCol for this UDAF instance
//------------------------------------------------------------------------------
void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux,
RowUDAFFunctionCol* rowUDAF)
RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx)
{
static_any::any valOut;
mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext);
// Get the user data
boost::shared_ptr<mcsv1sdk::UserData> userData = rowIn.getUserData(colIn + 1);
// Unlike other aggregates, the data isn't in colIn, so testing it for NULL
// there won't help. In case of NULL, userData will be NULL.
std::vector<uint32_t> flags;
uint32_t flag = 0;
uint32_t flags[1];
flags[0] = 0;
if (!userData)
{
if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS))
if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS))
{
return;
}
// Turn on NULL flags
flag |= mcsv1sdk::PARAM_IS_NULL;
flags[0] |= mcsv1sdk::PARAM_IS_NULL;
}
flags.push_back(flag);
rgContext.setDataFlags(&flags);
fRGContext.setDataFlags(flags);
// The intermediate values are stored in colAux.
rgContext.setUserData(fRow.getUserData(colAux));
fRGContext.setUserData(fRow.getUserData(colAux));
// Call the UDAF subEvaluate method
mcsv1sdk::mcsv1_UDAF::ReturnCode rc;
rc = rgContext.getFunction()->subEvaluate(&rgContext, userData.get());
rgContext.setUserData(NULL);
rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userData.get());
fRGContext.setUserData(NULL);
if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
{
rowUDAF->bInterrupted = true;
throw logging::IDBExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr);
throw logging::IDBExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr);
}
}
@ -4246,7 +4433,7 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn)
if (rowUDAF)
{
doUDAF(rowIn, colIn, colOut, colAux, rowUDAF);
doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i);
}
else
{

View File

@ -110,6 +110,9 @@ enum RowAggFunctionType
// User Defined Aggregate Function
ROWAGG_UDAF,
// If an Aggregate has more than one parameter, this will be used for parameters after the first
ROWAGG_MULTI_PARM,
// internal function type to avoid duplicate the work
// handling ROWAGG_COUNT_NO_OP, ROWAGG_DUP_FUNCT and ROWAGG_DUP_AVG is a little different
// ROWAGG_COUNT_NO_OP : count done by AVG, no need to copy
@ -583,7 +586,7 @@ protected:
virtual void doAvg(const Row&, int64_t, int64_t, int64_t);
virtual void doStatistics(const Row&, int64_t, int64_t, int64_t);
virtual void doBitOp(const Row&, int64_t, int64_t, int);
virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF);
virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx);
virtual bool countSpecial(const RowGroup* pRG)
{
fRow.setIntField<8>(fRow.getIntField<8>(0) + pRG->getRowCount(), 0);
@ -660,6 +663,25 @@ protected:
//need access to rowgroup storage holding the rows to hash & ==.
friend class AggHasher;
friend class AggComparator;
// We need a separate copy for each thread.
mcsv1sdk::mcsv1Context fRGContext;
// These are handy for testing the actual type of static_any for UDAF
static const static_any::any& charTypeId;
static const static_any::any& scharTypeId;
static const static_any::any& shortTypeId;
static const static_any::any& intTypeId;
static const static_any::any& longTypeId;
static const static_any::any& llTypeId;
static const static_any::any& ucharTypeId;
static const static_any::any& ushortTypeId;
static const static_any::any& uintTypeId;
static const static_any::any& ulongTypeId;
static const static_any::any& ullTypeId;
static const static_any::any& floatTypeId;
static const static_any::any& doubleTypeId;
static const static_any::any& strTypeId;
};
//------------------------------------------------------------------------------
@ -783,6 +805,9 @@ protected:
// Sets the value from valOut into column colOut, performing any conversions.
void SetUDAFValue(static_any::any& valOut, int64_t colOut);
// If the datatype returned by evaluate isn't what we expect, convert.
void SetUDAFAnyValue(static_any::any& valOut, int64_t colOut);
// calculate the UDAF function all rows received. UM only function.
void calculateUDAFColumns();
@ -877,7 +902,7 @@ protected:
void doStatistics(const Row&, int64_t, int64_t, int64_t);
void doGroupConcat(const Row&, int64_t, int64_t);
void doBitOp(const Row&, int64_t, int64_t, int);
void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF);
void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx);
bool countSpecial(const RowGroup* pRG)
{
return false;

View File

@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES}
########### next target ###############
set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp)
set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp regr_avgx.cpp avgx.cpp)
add_definitions(-DMYSQL_DYNAMIC_PLUGIN)

View File

@ -27,11 +27,11 @@ struct allnull_data
#define OUT_TYPE int64_t
mcsv1_UDAF::ReturnCode allnull::init(mcsv1Context* context,
COL_TYPES& colTypes)
ColumnDatum* colTypes)
{
context->setUserDataSize(sizeof(allnull_data));
if (colTypes.size() < 1)
if (context->getParameterCount() < 1)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
@ -52,8 +52,7 @@ mcsv1_UDAF::ReturnCode allnull::reset(mcsv1Context* context)
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsIn)
mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
{
struct allnull_data* data = (struct allnull_data*)context->getUserData()->data;

View File

@ -103,7 +103,7 @@ public:
* colTypes or wrong number of arguments. Else return
* mcsv1_UDAF::SUCCESS.
*/
virtual ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes);
virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes);
/**
* reset()
@ -138,7 +138,7 @@ public:
*
* valsIn (in) - a vector of the parameters from the row.
*/
virtual ReturnCode nextValue(mcsv1Context* context, std::vector<ColumnDatum>& valsIn);
virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
/**
* subEvaluate()

View File

@ -25,9 +25,9 @@
using namespace mcsv1sdk;
mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context,
COL_TYPES& colTypes)
ColumnDatum* colTypes)
{
if (colTypes.size() < 1)
if (context->getParameterCount() < 1)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context,
return mcsv1_UDAF::ERROR;
}
if (colTypes.size() > 1)
if (context->getParameterCount() > 1)
{
context->setErrorMessage("avg_mode() with more than 1 argument");
return mcsv1_UDAF::ERROR;
}
if (!(isNumeric(colTypes[0].second)))
if (!(isNumeric(colTypes[0].dataType)))
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode avg_mode::reset(mcsv1Context* context)
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsIn)
mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
{
static_any::any& valIn = valsIn[0].columnData;
MODE_DATA& data = static_cast<ModeData*>(context->getUserData())->mData;
@ -187,8 +186,7 @@ mcsv1_UDAF::ReturnCode avg_mode::evaluate(mcsv1Context* context, static_any::any
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsDropped)
mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
static_any::any& valIn = valsDropped[0].columnData;
MODE_DATA& data = static_cast<ModeData*>(context->getUserData())->mData;

View File

@ -18,7 +18,7 @@
/***********************************************************************
* $Id$
*
* mcsv1_UDAF.h
* avg_mode.h
***********************************************************************/
/**
@ -50,8 +50,8 @@
* is also used to describe the interface that is used for
* either.
*/
#ifndef HEADER_mode
#define HEADER_mode
#ifndef HEADER_avg_mode
#define HEADER_avg_mode
#include <cstdlib>
#include <string>
@ -134,7 +134,7 @@ public:
* mcsv1_UDAF::SUCCESS.
*/
virtual ReturnCode init(mcsv1Context* context,
COL_TYPES& colTypes);
ColumnDatum* colTypes);
/**
* reset()
@ -169,8 +169,7 @@ public:
*
* valsIn (in) - a vector of the parameters from the row.
*/
virtual ReturnCode nextValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsIn);
virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
/**
* subEvaluate()
@ -246,8 +245,7 @@ public:
* dropValue() will not be called for unbounded/current row type
* frames, as those are already optimized.
*/
virtual ReturnCode dropValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsDropped);
virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
/**
* createUserData()

View File

@ -36,6 +36,8 @@ UDAF_MAP UDAFMap::fm;
#include "ssq.h"
#include "median.h"
#include "avg_mode.h"
#include "regr_avgx.h"
#include "avgx.h"
UDAF_MAP& UDAFMap::getMap()
{
if (fm.size() > 0)
@ -52,6 +54,8 @@ UDAF_MAP& UDAFMap::getMap()
fm["ssq"] = new ssq();
fm["median"] = new median();
fm["avg_mode"] = new avg_mode();
fm["regr_avgx"] = new regr_avgx();
fm["avgx"] = new avgx();
return fm;
}
@ -115,8 +119,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const
{
// We don't test the per row data fields. They don't determine
// if it's the same Context.
if (getName() != c.getName()
|| fRunFlags != c.fRunFlags
if (getName() != c.getName()
||fRunFlags != c.fRunFlags
|| fContextFlags != c.fContextFlags
|| fUserDataSize != c.fUserDataSize
|| fResultType != c.fResultType
@ -125,7 +129,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const
|| fStartFrame != c.fStartFrame
|| fEndFrame != c.fEndFrame
|| fStartConstant != c.fStartConstant
|| fEndConstant != c.fEndConstant)
|| fEndConstant != c.fEndConstant
|| fParamCount != c.fParamCount)
return false;
return true;
@ -217,6 +222,7 @@ void mcsv1Context::serialize(messageqcpp::ByteStream& b) const
b << (uint32_t)fEndFrame;
b << fStartConstant;
b << fEndConstant;
b << fParamCount;
}
void mcsv1Context::unserialize(messageqcpp::ByteStream& b)
@ -238,6 +244,7 @@ void mcsv1Context::unserialize(messageqcpp::ByteStream& b)
fEndFrame = (WF_FRAME)frame;
b >> fStartConstant;
b >> fEndConstant;
b >> fParamCount;
}
void UserData::serialize(messageqcpp::ByteStream& bs) const

View File

@ -77,6 +77,7 @@
#include "any.hpp"
#include "calpontsystemcatalog.h"
#include "wf_frame.h"
#include "my_decimal_limits.h"
using namespace execplan;
@ -200,12 +201,8 @@ static uint64_t CONTEXT_IS_PM __attribute__ ((unused)) = 1 << 2;
// Flags that describe the contents of a specific input parameter
// These will be set in context->dataFlags for each method call by the framework.
// User code shouldn't use these directly
static uint64_t PARAM_IS_NULL __attribute__ ((unused)) = 1;
static uint64_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1;
// shorthand for the list of columns in the call sent to init()
// first is the actual column name and second is the data type in Columnstore.
typedef std::vector<std::pair<std::string, CalpontSystemCatalog::ColDataType> >COL_TYPES;
static uint32_t PARAM_IS_NULL __attribute__ ((unused)) = 1;
static uint32_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1;
// This is the context class that is passed to all API callbacks
// The framework potentially sets data here for each invocation of
@ -269,7 +266,9 @@ public:
EXPORT bool isPM();
// Parameter refinement description accessors
// valid in nextValue and dropValue
// How many actual parameters were entered.
// valid in all calls
size_t getParameterCount() const;
// Determine if an input parameter is NULL
@ -298,6 +297,7 @@ public:
// This only makes sense if the return type is decimal, but should be set
// to (0, -1) for other types if the inout is decimal.
// valid in init()
// Set the scale to DECIMAL_NOT_SPECIFIED if you want a floating decimal.
EXPORT bool setScale(int32_t scale);
EXPORT bool setPrecision(int32_t precision);
@ -372,7 +372,7 @@ private:
int32_t fResultscale; // For scale, the number of digits to the right of the decimal
int32_t fResultPrecision; // The max number of digits allowed in the decimal value
std::string errorMsg;
std::vector<uint32_t>* dataFlags; // one entry for each parameter
uint32_t* dataFlags; // an integer array wirh one entry for each parameter
bool* bInterrupted; // Gets set to true by the Framework if something happens
WF_FRAME fStartFrame; // Is set to default to start, then modified by the actual frame in the call
WF_FRAME fEndFrame; // Is set to default to start, then modified by the actual frame in the call
@ -380,6 +380,7 @@ private:
int32_t fEndConstant; // for end frame WF_PRECEEDIMG or WF_FOLLOWING
std::string functionName;
mcsv1sdk::mcsv1_UDAF* func;
int32_t fParamCount;
public:
// For use by the framework
@ -394,13 +395,14 @@ public:
EXPORT void clearContextFlag(uint64_t flag);
EXPORT uint64_t getContextFlags() const;
EXPORT uint32_t getUserDataSize() const;
EXPORT std::vector<uint32_t>& getDataFlags();
EXPORT void setDataFlags(std::vector<uint32_t>* flags);
EXPORT uint32_t* getDataFlags();
EXPORT void setDataFlags(uint32_t* flags);
EXPORT void setInterrupted(bool interrupted);
EXPORT void setInterrupted(bool* interrupted);
EXPORT mcsv1sdk::mcsv1_UDAF* getFunction();
EXPORT mcsv1sdk::mcsv1_UDAF* getFunction() const;
EXPORT boost::shared_ptr<UserData> getUserDataSP();
EXPORT void setParamCount(int32_t paramCount);
};
// Since aggregate functions can operate on any data type, we use the following structure
@ -419,9 +421,10 @@ public:
struct ColumnDatum
{
CalpontSystemCatalog::ColDataType dataType; // defined in calpontsystemcatalog.h
static_any::any columnData;
static_any::any columnData; // Not valid in init()
uint32_t scale; // If dataType is a DECIMAL type
uint32_t precision; // If dataType is a DECIMAL type
std::string alias; // Only filled in for init()
ColumnDatum() : dataType(CalpontSystemCatalog::UNDEFINED), scale(0), precision(-1) {};
};
@ -466,7 +469,7 @@ public:
* mcsv1_UDAF::SUCCESS.
*/
virtual ReturnCode init(mcsv1Context* context,
COL_TYPES& colTypes) = 0;
ColumnDatum* colTypes) = 0;
/**
* reset()
@ -501,8 +504,7 @@ public:
*
* valsIn (in) - a vector of the parameters from the row.
*/
virtual ReturnCode nextValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsIn) = 0;
virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn) = 0;
/**
* subEvaluate()
@ -579,8 +581,7 @@ public:
* dropValue() will not be called for unbounded/current row type
* frames, as those are already optimized.
*/
virtual ReturnCode dropValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsDropped);
virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
/**
* createUserData()
@ -640,32 +641,32 @@ inline mcsv1Context::mcsv1Context() :
fEndFrame(WF_CURRENT_ROW),
fStartConstant(0),
fEndConstant(0),
func(NULL)
func(NULL),
fParamCount(0)
{
}
inline mcsv1Context::mcsv1Context(const mcsv1Context& rhs) :
fContextFlags(0),
fColWidth(0),
dataFlags(NULL),
bInterrupted(NULL),
func(NULL)
dataFlags(NULL)
{
copy(rhs);
}
inline mcsv1Context& mcsv1Context::copy(const mcsv1Context& rhs)
{
fRunFlags = rhs.getRunFlags();
fResultType = rhs.getResultType();
fUserDataSize = rhs.getUserDataSize();
fResultscale = rhs.getScale();
fResultPrecision = rhs.getPrecision();
fRunFlags = rhs.fRunFlags;
fContextFlags = rhs.fContextFlags;
fResultType = rhs.fResultType;
fUserDataSize = rhs.fUserDataSize;
fColWidth = rhs.fColWidth;
fResultscale = rhs.fResultscale;
fResultPrecision = rhs.fResultPrecision;
rhs.getStartFrame(fStartFrame, fStartConstant);
rhs.getEndFrame(fEndFrame, fEndConstant);
functionName = rhs.getName();
bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference
func = rhs.func;
functionName = rhs.functionName;
bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference
func = rhs.func;
fParamCount = rhs.fParamCount;
return *this;
}
@ -675,11 +676,7 @@ inline mcsv1Context::~mcsv1Context()
inline mcsv1Context& mcsv1Context::operator=(const mcsv1Context& rhs)
{
fContextFlags = 0;
fColWidth = 0;
dataFlags = NULL;
bInterrupted = NULL;
func = NULL;
return copy(rhs);
}
@ -753,16 +750,13 @@ inline bool mcsv1Context::isPM()
inline size_t mcsv1Context::getParameterCount() const
{
if (dataFlags)
return dataFlags->size();
return 0;
return fParamCount;
}
inline bool mcsv1Context::isParamNull(int paramIdx)
{
if (dataFlags)
return (*dataFlags)[paramIdx] & PARAM_IS_NULL;
return dataFlags[paramIdx] & PARAM_IS_NULL;
return false;
}
@ -770,7 +764,7 @@ inline bool mcsv1Context::isParamNull(int paramIdx)
inline bool mcsv1Context::isParamConstant(int paramIdx)
{
if (dataFlags)
return (*dataFlags)[paramIdx] & PARAM_IS_CONSTANT;
return dataFlags[paramIdx] & PARAM_IS_CONSTANT;
return false;
}
@ -939,18 +933,22 @@ inline uint32_t mcsv1Context::getUserDataSize() const
return fUserDataSize;
}
inline std::vector<uint32_t>& mcsv1Context::getDataFlags()
inline uint32_t* mcsv1Context::getDataFlags()
{
return *dataFlags;
return dataFlags;
}
inline void mcsv1Context::setDataFlags(std::vector<uint32_t>* flags)
inline void mcsv1Context::setDataFlags(uint32_t* flags)
{
dataFlags = flags;
}
inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsDropped)
inline void mcsv1Context::setParamCount(int32_t paramCount)
{
fParamCount = paramCount;
}
inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
return NOT_IMPLEMENTED;
}

View File

@ -25,9 +25,9 @@
using namespace mcsv1sdk;
mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context,
COL_TYPES& colTypes)
ColumnDatum* colTypes)
{
if (colTypes.size() < 1)
if (context->getParameterCount() < 1)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context,
return mcsv1_UDAF::ERROR;
}
if (colTypes.size() > 1)
if (context->getParameterCount() > 1)
{
context->setErrorMessage("median() with more than 1 argument");
return mcsv1_UDAF::ERROR;
}
if (!(isNumeric(colTypes[0].second)))
if (!(isNumeric(colTypes[0].dataType)))
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode median::reset(mcsv1Context* context)
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsIn)
mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
{
static_any::any& valIn = valsIn[0].columnData;
MEDIAN_DATA& data = static_cast<MedianData*>(context->getUserData())->mData;
@ -212,8 +211,7 @@ mcsv1_UDAF::ReturnCode median::evaluate(mcsv1Context* context, static_any::any&
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsDropped)
mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
static_any::any& valIn = valsDropped[0].columnData;
MEDIAN_DATA& data = static_cast<MedianData*>(context->getUserData())->mData;

View File

@ -134,7 +134,7 @@ public:
* mcsv1_UDAF::SUCCESS.
*/
virtual ReturnCode init(mcsv1Context* context,
COL_TYPES& colTypes);
ColumnDatum* colTypes);
/**
* reset()
@ -169,8 +169,7 @@ public:
*
* valsIn (in) - a vector of the parameters from the row.
*/
virtual ReturnCode nextValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsIn);
virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
/**
* subEvaluate()
@ -246,8 +245,7 @@ public:
* dropValue() will not be called for unbounded/current row type
* frames, as those are already optimized.
*/
virtual ReturnCode dropValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsDropped);
virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
/**
* createUserData()

View File

@ -34,9 +34,9 @@ struct ssq_data
#define OUT_TYPE int64_t
mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context,
COL_TYPES& colTypes)
ColumnDatum* colTypes)
{
if (colTypes.size() < 1)
if (context->getParameterCount() < 1)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
@ -44,13 +44,13 @@ mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context,
return mcsv1_UDAF::ERROR;
}
if (colTypes.size() > 1)
if (context->getParameterCount() > 1)
{
context->setErrorMessage("ssq() with more than 1 argument");
return mcsv1_UDAF::ERROR;
}
if (!(isNumeric(colTypes[0].second)))
if (!(isNumeric(colTypes[0].dataType)))
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
@ -81,8 +81,7 @@ mcsv1_UDAF::ReturnCode ssq::reset(mcsv1Context* context)
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsIn)
mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
{
static_any::any& valIn = valsIn[0].columnData;
struct ssq_data* data = (struct ssq_data*)context->getUserData()->data;
@ -183,8 +182,7 @@ mcsv1_UDAF::ReturnCode ssq::evaluate(mcsv1Context* context, static_any::any& val
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsDropped)
mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
static_any::any& valIn = valsDropped[0].columnData;
struct ssq_data* data = (struct ssq_data*)context->getUserData()->data;

View File

@ -114,7 +114,7 @@ public:
* mcsv1_UDAF::SUCCESS.
*/
virtual ReturnCode init(mcsv1Context* context,
COL_TYPES& colTypes);
ColumnDatum* colTypes);
/**
* reset()
@ -147,8 +147,7 @@ public:
*
* valsIn (in) - a vector of the parameters from the row.
*/
virtual ReturnCode nextValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsIn);
virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
/**
* subEvaluate()
@ -224,8 +223,7 @@ public:
* dropValue() will not be called for unbounded/current row type
* frames, as those are already optimized.
*/
virtual ReturnCode dropValue(mcsv1Context* context,
std::vector<ColumnDatum>& valsDropped);
virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
protected:
};

View File

@ -490,6 +490,168 @@ extern "C"
// return data->sumsq;
return 0;
}
//=======================================================================
/**
* regr_avgx connector stub
*/
struct regr_avgx_data
{
double sumx;
int64_t cnt;
};
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool regr_avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
struct regr_avgx_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_avgx() requires two arguments");
return 1;
}
if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->sumx = 0;
data->cnt = 0;
initid->ptr = (char*)data;
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void regr_avgx_deinit(UDF_INIT* initid)
{
free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr;
data->sumx = 0;
data->cnt = 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_avgx_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
// TODO test for NULL in x and y
struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr;
double xval = cvtArgToDouble(args->arg_type[1], args->args[0]);
++data->cnt;
data->sumx += xval;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
long long regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr;
return data->sumx / data->cnt;
}
//=======================================================================
/**
* avgx connector stub. Exactly the same functionality as the
* built in avg() function. Use to test the performance of the
* API
*/
struct avgx_data
{
double sumx;
int64_t cnt;
};
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
struct avgx_data* data;
if (args->arg_count != 1)
{
strcpy(message,"avgx() requires one argument");
return 1;
}
if (!(data = (struct avgx_data*) malloc(sizeof(struct avgx_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->sumx = 0;
data->cnt = 0;
initid->ptr = (char*)data;
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void avgx_deinit(UDF_INIT* initid)
{
free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
struct avgx_data* data = (struct avgx_data*)initid->ptr;
data->sumx = 0;
data->cnt = 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
avgx_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
// TODO test for NULL in x and y
struct avgx_data* data = (struct avgx_data*)initid->ptr;
double xval = cvtArgToDouble(args->arg_type[1], args->args[0]);
++data->cnt;
data->sumx += xval;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
long long avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
struct avgx_data* data = (struct avgx_data*)initid->ptr;
return data->sumx / data->cnt;
}
}
// vim:ts=4 sw=4:

View File

@ -204,8 +204,10 @@
Filters="*.c;*.C;*.cc;*.cpp;*.cp;*.cxx;*.c++;*.prg;*.pas;*.dpr;*.asm;*.s;*.bas;*.java;*.cs;*.sc;*.e;*.cob;*.html;*.rc;*.tcl;*.py;*.pl;*.d">
<F N="allnull.cpp"/>
<F N="avg_mode.cpp"/>
<F N="avgx.cpp"/>
<F N="mcsv1_udaf.cpp"/>
<F N="median.cpp"/>
<F N="regr_avgx.cpp"/>
<F N="ssq.cpp"/>
<F N="udfmysql.cpp"/>
<F N="udfsdk.cpp"/>
@ -215,8 +217,10 @@
Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if">
<F N="allnull.h"/>
<F N="avg_mode.h"/>
<F N="avgx.h"/>
<F N="mcsv1_udaf.h"/>
<F N="median.h"/>
<F N="regr_avgx.h"/>
<F N="ssq.h"/>
<F N="udfsdk.h"/>
</Folder>

View File

@ -52,6 +52,7 @@ using namespace joblist;
namespace windowfunction
{
template<typename T>
boost::shared_ptr<WindowFunctionType> WF_udaf<T>::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context)
{
@ -142,7 +143,7 @@ template<typename T>
void WF_udaf<T>::resetData()
{
getContext().getFunction()->reset(&getContext());
fSet.clear();
fDistinctSet.clear();
WindowFunctionType::resetData();
}
@ -150,8 +151,8 @@ template<typename T>
void WF_udaf<T>::parseParms(const std::vector<execplan::SRCP>& parms)
{
bRespectNulls = true;
// parms[1]: respect null | ignore null
ConstantColumn* cc = dynamic_cast<ConstantColumn*>(parms[1].get());
// The last parms: respect null | ignore null
ConstantColumn* cc = dynamic_cast<ConstantColumn*>(parms[parms.size()-1].get());
idbassert(cc != NULL);
bool isNull = false; // dummy, harded coded
bRespectNulls = (cc->getIntVal(fRow, isNull) > 0);
@ -167,52 +168,71 @@ bool WF_udaf<T>::dropValues(int64_t b, int64_t e)
}
mcsv1sdk::mcsv1_UDAF::ReturnCode rc;
uint64_t colOut = fFieldIndex[0];
uint64_t colIn = fFieldIndex[1];
mcsv1sdk::ColumnDatum datum;
datum.dataType = fRow.getColType(colIn);
datum.scale = fRow.getScale(colIn);
datum.precision = fRow.getPrecision(colOut);
// Turn on the Analytic flag so the function is aware it is being called
// as a Window Function.
getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC);
// Put the parameter metadata (type, scale, precision) into valsIn
mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()];
for (uint32_t i = 0; i < getContext().getParameterCount(); ++i)
{
uint64_t colIn = fFieldIndex[i+1];
mcsv1sdk::ColumnDatum& datum = valsIn[i];
datum.dataType = fRow.getColType(colIn);
datum.scale = fRow.getScale(colIn);
datum.precision = fRow.getPrecision(colIn);
}
for (int64_t i = b; i < e; i++)
{
if (i % 1000 == 0 && fStep->cancelled())
break;
bool bHasNull = false;
fRow.setData(getPointer(fRowData->at(i)));
// Turn on NULL flags
std::vector<uint32_t> flags;
uint32_t flag = 0;
uint32_t flags[getContext().getParameterCount()];
if (fRow.isNullValue(colIn) == true)
for (uint32_t k = 0; k < getContext().getParameterCount(); ++k)
{
if (!bRespectNulls)
uint64_t colIn = fFieldIndex[k+1];
mcsv1sdk::ColumnDatum& datum = valsIn[k];
flags[k] = 0;
if (fRow.isNullValue(colIn) == true)
{
continue;
if (!bRespectNulls)
{
bHasNull = true;
break;
}
flags[k] |= mcsv1sdk::PARAM_IS_NULL;
}
flag |= mcsv1sdk::PARAM_IS_NULL;
T valIn;
getValue(colIn, valIn, &datum.dataType);
// Check for distinct, if turned on.
// Currently, distinct only works for param 1
if (k == 0)
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
{
continue;
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;
}
flags.push_back(flag);
getContext().setDataFlags(&flags);
T valIn;
getValue(colIn, valIn, &datum.dataType);
// Check for distinct, if turned on.
// TODO: when we impliment distinct, we need to revist this.
if ((fDistinct) || (fSet.find(valIn) != fSet.end()))
if (bHasNull)
{
continue;
}
datum.columnData = valIn;
std::vector<mcsv1sdk::ColumnDatum> valsIn;
valsIn.push_back(datum);
rc = getContext().getFunction()->dropValue(&getContext(), valsIn);
if (rc == mcsv1sdk::mcsv1_UDAF::NOT_IMPLEMENTED)
@ -442,59 +462,191 @@ void WF_udaf<T>::operator()(int64_t b, int64_t e, int64_t c)
else if (fPrev <= e && fPrev > c)
e = c;
uint64_t colIn = fFieldIndex[1];
// Turn on the Analytic flag so the function is aware it is being called
// as a Window Function.
getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC);
mcsv1sdk::ColumnDatum datum;
datum.dataType = fRow.getColType(colIn);
datum.scale = fRow.getScale(colIn);
datum.precision = fRow.getPrecision(colOut);
// Put the parameter metadata (type, scale, precision) into valsIn
mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()];
for (uint32_t i = 0; i < getContext().getParameterCount(); ++i)
{
uint64_t colIn = fFieldIndex[i+1];
mcsv1sdk::ColumnDatum& datum = valsIn[i];
datum.dataType = fRow.getColType(colIn);
datum.scale = fRow.getScale(colIn);
datum.precision = fRow.getPrecision(colIn);
}
if (b <= c && c <= e)
getContext().setContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW);
else
getContext().clearContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW);
bool bHasNull = false;
for (int64_t i = b; i <= e; i++)
{
if (i % 1000 == 0 && fStep->cancelled())
break;
fRow.setData(getPointer(fRowData->at(i)));
// Turn on NULL flags
std::vector<uint32_t> flags;
uint32_t flag = 0;
if (fRow.isNullValue(colIn) == true)
// NULL flags
uint32_t flags[getContext().getParameterCount()];
for (uint32_t k = 0; k < getContext().getParameterCount(); ++k)
{
if (!bRespectNulls)
uint64_t colIn = fFieldIndex[k+1];
mcsv1sdk::ColumnDatum& datum = valsIn[k];
// Turn on Null flags or skip based on respect nulls
flags[k] = 0;
if (fRow.isNullValue(colIn) == true)
{
if (!bRespectNulls)
{
bHasNull = true;
break;
}
flags[k] |= mcsv1sdk::PARAM_IS_NULL;
}
// MCOL-1201 Multi-Paramter calls
switch (datum.dataType)
{
case CalpontSystemCatalog::TINYINT:
case CalpontSystemCatalog::SMALLINT:
case CalpontSystemCatalog::MEDINT:
case CalpontSystemCatalog::INT:
case CalpontSystemCatalog::BIGINT:
case CalpontSystemCatalog::DECIMAL:
{
int64_t valIn;
getValue(colIn, valIn);
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
{
continue;
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;
break;
}
case CalpontSystemCatalog::UTINYINT:
case CalpontSystemCatalog::USMALLINT:
case CalpontSystemCatalog::UMEDINT:
case CalpontSystemCatalog::UINT:
case CalpontSystemCatalog::UBIGINT:
case CalpontSystemCatalog::UDECIMAL:
{
uint64_t valIn;
getValue(colIn, valIn);
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
{
continue;
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;
break;
}
case CalpontSystemCatalog::DOUBLE:
case CalpontSystemCatalog::UDOUBLE:
{
double valIn;
getValue(colIn, valIn);
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
{
continue;
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;
break;
}
case CalpontSystemCatalog::FLOAT:
case CalpontSystemCatalog::UFLOAT:
{
float valIn;
getValue(colIn, valIn);
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
{
continue;
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;
break;
}
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::VARCHAR:
case CalpontSystemCatalog::VARBINARY:
case CalpontSystemCatalog::TEXT:
case CalpontSystemCatalog::BLOB:
{
string valIn;
getValue(colIn, valIn);
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
{
continue;
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;
break;
}
default:
{
string errStr = "(" + colType2String[i] + ")";
errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr);
cerr << errStr << endl;
throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE);
break;
}
}
// Skip if any value is NULL and respect nulls is off.
if (bHasNull)
{
continue;
}
flag |= mcsv1sdk::PARAM_IS_NULL;
}
flags.push_back(flag);
getContext().setDataFlags(&flags);
T valIn;
getValue(colIn, valIn, &datum.dataType);
// Check for distinct, if turned on.
if ((fDistinct) || (fSet.find(valIn) != fSet.end()))
{
continue;
}
if (fDistinct)
fSet.insert(valIn);
datum.columnData = valIn;
std::vector<mcsv1sdk::ColumnDatum> valsIn;
valsIn.push_back(datum);
getContext().setDataFlags(flags);
rc = getContext().getFunction()->nextValue(&getContext(), valsIn);
if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)

View File

@ -21,13 +21,35 @@
#ifndef UTILS_WF_UDAF_H
#define UTILS_WF_UDAF_H
#include <set>
#ifndef _MSC_VER
#include <tr1/unordered_set>
#else
#include <unordered_set>
#endif
#include "windowfunctiontype.h"
#include "mcsv1_udaf.h"
namespace windowfunction
{
// Hash classes for the distinct hashmap
class DistinctHasher
{
public:
inline size_t operator()(const static_any::any& a) const
{
return a.getHash();
}
};
class DistinctEqual
{
public:
inline bool operator()(const static_any::any& lhs, static_any::any& rhs) const
{
return lhs == rhs;
}
};
// A class to control the execution of User Define Analytic Functions (UDAnF)
// as defined by a specialization of mcsv1sdk::mcsv1_UDAF
@ -72,7 +94,8 @@ protected:
bool fDistinct;
bool bRespectNulls; // respect null | ignore null
bool bHasDropValue; // Set to false when we discover the UDAnF doesn't implement dropValue.
std::set<T> fSet; // To hold distinct values
// To hold distinct values
std::tr1::unordered_set<static_any::any, DistinctHasher, DistinctEqual> fDistinctSet;
static_any::any fValOut; // The return value
public:

View File

@ -492,10 +492,10 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos)
static uint64_t dateNull = joblist::DATENULL;
static uint64_t datetimeNull = joblist::DATETIMENULL;
static uint64_t timeNull = joblist::TIMENULL;
static uint64_t char1Null = joblist::CHAR1NULL;
static uint64_t char2Null = joblist::CHAR2NULL;
static uint64_t char4Null = joblist::CHAR4NULL;
static uint64_t char8Null = joblist::CHAR8NULL;
// static uint64_t char1Null = joblist::CHAR1NULL;
// static uint64_t char2Null = joblist::CHAR2NULL;
// static uint64_t char4Null = joblist::CHAR4NULL;
// static uint64_t char8Null = joblist::CHAR8NULL;
static string stringNull("");
void* v = NULL;

View File

@ -1280,7 +1280,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid,
((totalRow - rowsLeft) > 0) &&
(rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK))
{
for (unsigned k = 1; k < colStructList.size(); k++)
for (size_t k = 1; k < colStructList.size(); k++)
{
Column expandCol;
colOp = m_colOp[op(colStructList[k].fCompressionType)];
@ -2025,10 +2025,10 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid,
((totalRow - rowsLeft) > 0) &&
(rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK))
{
for (unsigned k = 0; k < colStructList.size(); k++)
for (size_t k = 0; k < colStructList.size(); k++)
{
// Skip the selected column
if (k == colId)
if (k == (size_t)colId)
continue;
Column expandCol;
@ -2583,7 +2583,7 @@ int WriteEngineWrapper::insertColumnRec_SYS(const TxnID& txnid,
((totalRow - rowsLeft) > 0) &&
(rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK))
{
for (unsigned k = 1; k < colStructList.size(); k++)
for (size_t k = 1; k < colStructList.size(); k++)
{
Column expandCol;
colOp = m_colOp[op(colStructList[k].fCompressionType)];
@ -3278,7 +3278,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid,
((totalRow - rowsLeft) > 0) &&
(rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK))
{
for (unsigned k = 1; k < colStructList.size(); k++)
for (size_t k = 1; k < colStructList.size(); k++)
{
Column expandCol;
colOp = m_colOp[op(colStructList[k].fCompressionType)];