From 82e8ab7518ea869e37cc1d724ce570164316a6ba Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 11 May 2018 09:50:10 -0500 Subject: [PATCH 01/19] MCOL-1201 manual rebase with develop. Obsoletes branch MCOL-1201 --- dbcon/execplan/aggregatecolumn.cpp | 96 +-- dbcon/execplan/aggregatecolumn.h | 44 +- dbcon/joblist/expressionstep.cpp | 12 +- dbcon/joblist/expressionstep.h | 1 + dbcon/joblist/groupconcat.cpp | 2 +- dbcon/joblist/joblistfactory.cpp | 531 ++++++++---- dbcon/joblist/tupleaggregatestep.cpp | 280 +++++-- dbcon/mysql/ha_calpont_execplan.cpp | 858 +++++++++++--------- dbcon/mysql/ha_calpont_impl.cpp | 8 +- dbcon/mysql/ha_window_function.cpp | 37 +- utils/common/any.hpp | 270 +++--- utils/rowgroup/rowaggregation.cpp | 605 +++++++++----- utils/rowgroup/rowaggregation.h | 29 +- utils/udfsdk/CMakeLists.txt | 2 +- utils/udfsdk/allnull.cpp | 7 +- utils/udfsdk/allnull.h | 4 +- utils/udfsdk/avg_mode.cpp | 14 +- utils/udfsdk/avg_mode.h | 14 +- utils/udfsdk/mcsv1_udaf.cpp | 13 +- utils/udfsdk/mcsv1_udaf.h | 88 +- utils/udfsdk/median.cpp | 14 +- utils/udfsdk/median.h | 8 +- utils/udfsdk/ssq.cpp | 14 +- utils/udfsdk/ssq.h | 8 +- utils/udfsdk/udfmysql.cpp | 162 ++++ utils/udfsdk/udfsdk.vpj | 4 + utils/windowfunction/wf_udaf.cpp | 280 +++++-- utils/windowfunction/wf_udaf.h | 27 +- utils/windowfunction/windowfunctiontype.cpp | 8 +- writeengine/wrapper/writeengine.cpp | 10 +- 30 files changed, 2255 insertions(+), 1195 deletions(-) diff --git a/dbcon/execplan/aggregatecolumn.cpp b/dbcon/execplan/aggregatecolumn.cpp index 18cba2607..5bce12d79 100644 --- a/dbcon/execplan/aggregatecolumn.cpp +++ b/dbcon/execplan/aggregatecolumn.cpp @@ -98,36 +98,6 @@ AggregateColumn::AggregateColumn(const uint32_t sessionID): { } -AggregateColumn::AggregateColumn(const AggOp aggOp, ReturnedColumn* parm, const uint32_t sessionID): - ReturnedColumn(sessionID), - fAggOp(aggOp), - fAsc(false), - fData(aggOp + "(" + parm->data() + ")") -{ - fFunctionParms.reset(parm); -} - -AggregateColumn::AggregateColumn(const AggOp aggOp, const string& content, const uint32_t sessionID): - ReturnedColumn(sessionID), - fAggOp(aggOp), - fAsc(false), - fData(aggOp + "(" + content + ")") -{ - // TODO: need to handle distinct - fFunctionParms.reset(new ArithmeticColumn(content)); -} - -// deprecated constructor. use function name as string -AggregateColumn::AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID): - ReturnedColumn(sessionID), - fFunctionName(functionName), - fAggOp(NOOP), - fAsc(false), - fData(functionName + "(" + parm->data() + ")") -{ - fFunctionParms.reset(parm); -} - // deprecated constructor. use function name as string AggregateColumn::AggregateColumn(const string& functionName, const string& content, const uint32_t sessionID): ReturnedColumn(sessionID), @@ -137,20 +107,21 @@ AggregateColumn::AggregateColumn(const string& functionName, const string& conte fData(functionName + "(" + content + ")") { // TODO: need to handle distinct - fFunctionParms.reset(new ArithmeticColumn(content)); + SRCP srcp(new ArithmeticColumn(content)); + fAggParms.push_back(srcp); } AggregateColumn::AggregateColumn( const AggregateColumn& rhs, const uint32_t sessionID ): ReturnedColumn(rhs, sessionID), fFunctionName (rhs.fFunctionName), fAggOp(rhs.fAggOp), - fFunctionParms(rhs.fFunctionParms), fTableAlias(rhs.tableAlias()), fAsc(rhs.asc()), fData(rhs.data()), fConstCol(rhs.fConstCol) { fAlias = rhs.alias(); + fAggParms = rhs.fAggParms; } /** @@ -166,10 +137,14 @@ const string AggregateColumn::toString() const if (fAlias.length() > 0) output << "/Alias: " << fAlias << endl; - if (fFunctionParms == 0) - output << "No arguments" << endl; + if (fAggParms.size() == 0) + output << "No arguments"; else - output << *fFunctionParms << endl; + for (uint32_t i = 0; i < fAggParms.size(); ++i) + { + output << *(fAggParms[i]) << " "; + } + output << endl; if (fConstCol) output << *fConstCol; @@ -191,10 +166,11 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const b << fFunctionName; b << static_cast(fAggOp); - if (fFunctionParms == 0) - b << (uint8_t) ObjectReader::NULL_CLASS; - else - fFunctionParms->serialize(b); + b << static_cast(fAggParms.size()); + for (uint32_t i = 0; i < fAggParms.size(); ++i) + { + fAggParms[i]->serialize(b); + } b << static_cast(fGroupByColList.size()); @@ -219,20 +195,26 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const void AggregateColumn::unserialize(messageqcpp::ByteStream& b) { - ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN); - fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end()); - fProjectColList.erase(fProjectColList.begin(), fProjectColList.end()); - ReturnedColumn::unserialize(b); - b >> fFunctionName; - b >> fAggOp; - //delete fFunctionParms; - fFunctionParms.reset( - dynamic_cast(ObjectReader::createTreeNode(b))); - messageqcpp::ByteStream::quadbyte size; messageqcpp::ByteStream::quadbyte i; ReturnedColumn* rc; + ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN); + fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end()); + fProjectColList.erase(fProjectColList.begin(), fProjectColList.end()); + fAggParms.erase(fAggParms.begin(), fAggParms.end()); + ReturnedColumn::unserialize(b); + b >> fFunctionName; + b >> fAggOp; + + b >> size; + for (i = 0; i < size; i++) + { + rc = dynamic_cast(ObjectReader::createTreeNode(b)); + SRCP srcp(rc); + fAggParms.push_back(srcp); + } + b >> size; for (i = 0; i < size; i++) @@ -261,6 +243,7 @@ void AggregateColumn::unserialize(messageqcpp::ByteStream& b) bool AggregateColumn::operator==(const AggregateColumn& t) const { const ReturnedColumn* rc1, *rc2; + AggParms::const_iterator it, it2; rc1 = static_cast(this); rc2 = static_cast(&t); @@ -277,16 +260,18 @@ bool AggregateColumn::operator==(const AggregateColumn& t) const if (fAggOp != t.fAggOp) return false; - if (fFunctionParms.get() != NULL && t.fFunctionParms.get() != NULL) + if (aggParms().size() != t.aggParms().size()) { - if (*fFunctionParms.get() != t.fFunctionParms.get()) + return false; + } + for (it = fAggParms.begin(), it2 = t.fAggParms.begin(); + it != fAggParms.end(); + ++it, ++it2) + { + if (**it != **it2) return false; } - else if (fFunctionParms.get() != NULL || t.fFunctionParms.get() != NULL) - return false; - //if (fAlias != t.fAlias) - // return false; if (fTableAlias != t.fTableAlias) return false; @@ -645,3 +630,4 @@ AggregateColumn::AggOp AggregateColumn::agname2num(const string& agname) } } // namespace execplan + diff --git a/dbcon/execplan/aggregatecolumn.h b/dbcon/execplan/aggregatecolumn.h index d1db7e5a4..b0884f179 100644 --- a/dbcon/execplan/aggregatecolumn.h +++ b/dbcon/execplan/aggregatecolumn.h @@ -40,6 +40,8 @@ class ByteStream; namespace execplan { +typedef std::vector AggParms; + /** * @brief A class to represent a aggregate return column * @@ -74,7 +76,8 @@ public: BIT_OR, BIT_XOR, GROUP_CONCAT, - UDAF + UDAF, + MULTI_PARM }; /** @@ -94,21 +97,6 @@ public: */ AggregateColumn(const uint32_t sessionID); - /** - * ctor - */ - AggregateColumn(const AggOp aggop, ReturnedColumn* parm, const uint32_t sessionID = 0); - - /** - * ctor - */ - AggregateColumn(const AggOp aggop, const std::string& content, const uint32_t sessionID = 0); - - /** - * ctor - */ - AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID = 0); - /** * ctor */ @@ -155,24 +143,27 @@ public: fAggOp = aggOp; } + /** get function parms - * - * set the function parms from this object */ - virtual const SRCP functionParms() const + virtual AggParms& aggParms() { - return fFunctionParms; + return fAggParms; + } + + virtual const AggParms& aggParms() const + { + return fAggParms; } /** set function parms - * - * set the function parms for this object */ - virtual void functionParms(const SRCP& functionParms) + virtual void aggParms(const AggParms& parms) { - fFunctionParms = functionParms; + fAggParms = parms; } + /** return a copy of this pointer * * deep copy of this pointer and return the copy @@ -325,9 +316,10 @@ protected: uint8_t fAggOp; /** - * A ReturnedColumn objects that are the arguments to this function + * ReturnedColumn objects that are the arguments to this + * function */ - SRCP fFunctionParms; + AggParms fAggParms; /** table alias * A string to represent table alias name which contains this column diff --git a/dbcon/joblist/expressionstep.cpp b/dbcon/joblist/expressionstep.cpp index 0e064c359..4a8a14ff3 100644 --- a/dbcon/joblist/expressionstep.cpp +++ b/dbcon/joblist/expressionstep.cpp @@ -56,6 +56,17 @@ using namespace rowgroup; namespace joblist { +ExpressionStep::ExpressionStep() : + fExpressionFilter(NULL), + fExpressionId(-1), + fVarBinOK(false), + fSelectFilter(false), + fAssociatedJoinId(0), + fDoJoin(false), + fVirtual(false) +{ +} + ExpressionStep::ExpressionStep(const JobInfo& jobInfo) : JobStep(jobInfo), fExpressionFilter(NULL), @@ -68,7 +79,6 @@ ExpressionStep::ExpressionStep(const JobInfo& jobInfo) : { } - ExpressionStep::ExpressionStep(const ExpressionStep& rhs) : JobStep(rhs), fExpression(rhs.expression()), diff --git a/dbcon/joblist/expressionstep.h b/dbcon/joblist/expressionstep.h index 4a069440f..63423fc7d 100644 --- a/dbcon/joblist/expressionstep.h +++ b/dbcon/joblist/expressionstep.h @@ -50,6 +50,7 @@ class ExpressionStep : public JobStep { public: // constructors + ExpressionStep(); ExpressionStep(const JobInfo&); // destructor constructors virtual ~ExpressionStep(); diff --git a/dbcon/joblist/groupconcat.cpp b/dbcon/joblist/groupconcat.cpp index 234fc0a8e..afc91a2ec 100644 --- a/dbcon/joblist/groupconcat.cpp +++ b/dbcon/joblist/groupconcat.cpp @@ -78,7 +78,7 @@ void GroupConcatInfo::prepGroupConcat(JobInfo& jobInfo) while (i != jobInfo.groupConcatCols.end()) { GroupConcatColumn* gcc = dynamic_cast(i->get()); - const RowColumn* rcp = dynamic_cast(gcc->functionParms().get()); + const RowColumn* rcp = dynamic_cast(gcc->aggParms()[0].get()); SP_GroupConcat groupConcat(new GroupConcat); groupConcat->fSeparator = gcc->separator(); diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index a48ecd13a..4cf7bccc5 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -18,7 +18,6 @@ // $Id: joblistfactory.cpp 9632 2013-06-18 22:18:20Z xlou $ - #include #include #include @@ -870,7 +869,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo if (gcc != NULL) { - srcp = gcc->functionParms(); + srcp = gcc->aggParms()[0]; const RowColumn* rcp = dynamic_cast(srcp.get()); const vector& cols = rcp->columnVec(); @@ -891,21 +890,55 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } +#if 0 + // MCOL-1201 Add support for multi-parameter UDAnF + UDAFColumn* udafc = dynamic_cast(retCols[i].get()); + if (udafc != NULL) + { + srcp = udafc->aggParms()[0]; + const RowColumn* rcp = dynamic_cast(srcp.get()); + const vector& cols = rcp->columnVec(); + for (vector::const_iterator j = cols.begin(); j != cols.end(); j++) + { + srcp = *j; + if (dynamic_cast(srcp.get()) == NULL) + retCols.push_back(srcp); + + // Do we need this? + const ArithmeticColumn* ac = dynamic_cast(srcp.get()); + const FunctionColumn* fc = dynamic_cast(srcp.get()); + if (ac != NULL || fc != NULL) + { + // bug 3728, make a dummy expression step for each expression. + scoped_ptr es(new ExpressionStep(jobInfo)); + es->expression(srcp, jobInfo); + } + } + continue; + } +#endif srcp = retCols[i]; const AggregateColumn* ag = dynamic_cast(retCols[i].get()); - - if (ag != NULL) - srcp = ag->functionParms(); - - const ArithmeticColumn* ac = dynamic_cast(srcp.get()); - const FunctionColumn* fc = dynamic_cast(srcp.get()); - - if (ac != NULL || fc != NULL) + // bug 3728 Make a dummy expression for srcp if it is an + // expression. This is needed to fill in some stuff. + // Note that es.expression does nothing if the item is not an expression. + if (ag == NULL) { - // bug 3728, make a dummy expression step for each expression. - scoped_ptr es(new ExpressionStep(jobInfo)); - es->expression(srcp, jobInfo); + // Not an aggregate. Make a dummy expression for the item + ExpressionStep es; + es.expression(srcp, jobInfo); + } + else + { + // MCOL-1201 multi-argument aggregate. make a dummy expression + // step for each argument that is an expression. + for (uint32_t i = 0; i < ag->aggParms().size(); ++i) + { + srcp = ag->aggParms()[i]; + ExpressionStep es; + es.expression(srcp, jobInfo); + } } } @@ -915,17 +948,18 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo { srcp = retCols[i]; const SimpleColumn* sc = dynamic_cast(srcp.get()); + AggregateColumn* aggc = dynamic_cast(srcp.get()); bool doDistinct = (csep->distinct() && csep->groupByCols().empty()); uint32_t tupleKey = -1; string alias; string view; - // returned column could be groupby column, a simplecoulumn not a agregatecolumn + // returned column could be groupby column, a simplecoulumn not an aggregatecolumn int op = 0; CalpontSystemCatalog::OID dictOid = 0; CalpontSystemCatalog::ColType ct, aggCt; - if (sc == NULL) + if (aggc) { GroupConcatColumn* gcc = dynamic_cast(retCols[i].get()); @@ -939,7 +973,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo tupleKey = ti.key; jobInfo.returnedColVec.push_back(make_pair(tupleKey, gcc->aggOp())); // not a tokenOnly column. Mark all the columns involved - srcp = gcc->functionParms(); + srcp = gcc->aggParms()[0]; const RowColumn* rowCol = dynamic_cast(srcp.get()); if (rowCol) @@ -963,186 +997,353 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } - - AggregateColumn* ac = dynamic_cast(retCols[i].get()); - - if (ac != NULL) + else { - srcp = ac->functionParms(); - sc = dynamic_cast(srcp.get()); + // Aggregate column not group concat + AggParms& aggParms = aggc->aggParms(); - if (ac->constCol().get() != NULL) + for (uint32_t parm = 0; parm < aggParms.size(); ++parm) { - // replace the aggregate on constant with a count(*) - SRCP clone; - UDAFColumn* udafc = dynamic_cast(ac); - - if (udafc) + if (aggc->constCol().get() != NULL) { - clone.reset(new UDAFColumn(*udafc, ac->sessionID())); + // replace the aggregate on constant with a count(*) + SRCP clone; + UDAFColumn* udafc = dynamic_cast(aggc); + + if (udafc) + { + clone.reset(new UDAFColumn(*udafc, aggc->sessionID())); + } + else + { + clone.reset(new AggregateColumn(*aggc, aggc->sessionID())); + } + + jobInfo.constAggregate.insert(make_pair(i, clone)); + aggc->aggOp(AggregateColumn::COUNT_ASTERISK); + aggc->distinct(false); + } + + srcp = aggParms[parm]; + sc = dynamic_cast(srcp.get()); + if (parm == 0) + { + op = aggc->aggOp(); } else { - clone.reset(new AggregateColumn(*ac, ac->sessionID())); + op = AggregateColumn::MULTI_PARM; + } + doDistinct = aggc->distinct(); + if (aggParms.size() == 1) + { + // Set the col type based on the single parm. + // Changing col type based on a parm if multiple parms + // doesn't really make sense. + updateAggregateColType(aggc, srcp, op, jobInfo); + } + aggCt = aggc->resultType(); + + // As of bug3695, make sure varbinary is not used in aggregation. + // TODO: allow for UDAF + if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY) + throw runtime_error ("VARBINARY in aggregate function is not supported."); + + // Project the parm columns or expressions + if (sc != NULL) + { + CalpontSystemCatalog::OID retOid = sc->oid(); + CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); + alias = extractTableAlias(sc); + view = sc->viewName(); + + if (!sc->schemaName().empty()) + { + ct = sc->colType(); + + //XXX use this before connector sets colType in sc correctly. + if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) + ct = jobInfo.csc->colType(sc->oid()); + + //X + dictOid = isDictCol(ct); + } + else + { + retOid = (tblOid + 1) + sc->colPosition(); + ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; + } + + TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); + tupleKey = ti.key; + + // this is a string column + if (dictOid > 0) + { + map::iterator findit = jobInfo.tokenOnly.find(tupleKey); + + // if the column has never seen, and the op is count: possible need count only. + if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) + { + if (findit == jobInfo.tokenOnly.end()) + jobInfo.tokenOnly[tupleKey] = true; + } + // if aggregate other than count, token is not enough. + else if (op != 0 || doDistinct) + { + jobInfo.tokenOnly[tupleKey] = false; + } + + findit = jobInfo.tokenOnly.find(tupleKey); + + if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) + { + dictMap[tupleKey] = dictOid; + jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; + ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); + jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + } + } + } + else + { + const ArithmeticColumn* ac = NULL; + const FunctionColumn* fc = NULL; + const WindowFunctionColumn* wc = NULL; + bool hasAggCols = false; + + if ((ac = dynamic_cast(srcp.get())) != NULL) + { + if (ac->aggColumnList().size() > 0) + hasAggCols = true; + } + else if ((fc = dynamic_cast(srcp.get())) != NULL) + { + if (fc->aggColumnList().size() > 0) + hasAggCols = true; + } + else if (dynamic_cast(srcp.get()) != NULL) + { + std::ostringstream errmsg; + errmsg << "Invalid aggregate function nesting."; + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + else if (dynamic_cast(srcp.get()) != NULL) + { + } + else if ((wc = dynamic_cast(srcp.get())) == NULL) + { + std::ostringstream errmsg; + errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + + uint64_t eid = srcp.get()->expressionId(); + ct = srcp.get()->resultType(); + TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); + tupleKey = ti.key; + + if (hasAggCols) + jobInfo.expressionVec.push_back(tupleKey); } - jobInfo.constAggregate.insert(make_pair(i, clone)); - ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ac->distinct(false); - } + // add to project list + vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - op = ac->aggOp(); - doDistinct = ac->distinct(); - updateAggregateColType(ac, srcp, op, jobInfo); - aggCt = ac->resultType(); + if (keyIt == projectKeys.end()) + { + RetColsVector::iterator it = pcv.end(); - // As of bug3695, make sure varbinary is not used in aggregation. - if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY) - throw runtime_error ("VARBINARY in aggregate function is not supported."); - } - } + if (doDistinct) + it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); + else + it = pcv.insert(pcv.end(), srcp); - // simple column selected or aggregated - if (sc != NULL) - { - // one column only need project once - CalpontSystemCatalog::OID retOid = sc->oid(); - CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); - alias = extractTableAlias(sc); - view = sc->viewName(); + projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); + } + else if (doDistinct) // @bug4250, move forward distinct column if necessary. + { + uint32_t pos = distance(projectKeys.begin(), keyIt); - if (!sc->schemaName().empty()) - { - ct = sc->colType(); + if (pos >= lastGroupByPos) + { + pcv[pos] = pcv[lastGroupByPos]; + pcv[lastGroupByPos] = srcp; + projectKeys[pos] = projectKeys[lastGroupByPos]; + projectKeys[lastGroupByPos] = tupleKey; + lastGroupByPos++; + } + } -//XXX use this before connector sets colType in sc correctly. - if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) - ct = jobInfo.csc->colType(sc->oid()); + if (doDistinct && dictOid > 0) + tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; -//X - dictOid = isDictCol(ct); - } - else - { - retOid = (tblOid + 1) + sc->colPosition(); - ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; - } + // remember the columns to be returned + jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); - TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); - tupleKey = ti.key; + if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) + jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; - // this is a string column - if (dictOid > 0) - { - map::iterator findit = jobInfo.tokenOnly.find(tupleKey); - - // if the column has never seen, and the op is count: possible need count only. - if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) - { - if (findit == jobInfo.tokenOnly.end()) - jobInfo.tokenOnly[tupleKey] = true; - } - // if aggregate other than count, token is not enough. - else if (op != 0 || doDistinct) - { - jobInfo.tokenOnly[tupleKey] = false; - } - - findit = jobInfo.tokenOnly.find(tupleKey); - - if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) - { - dictMap[tupleKey] = dictOid; - jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; - ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); - jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } } else { - const ArithmeticColumn* ac = NULL; - const FunctionColumn* fc = NULL; - const WindowFunctionColumn* wc = NULL; - bool hasAggCols = false; - - if ((ac = dynamic_cast(srcp.get())) != NULL) + // Not an Aggregate + // simple column selected + if (sc != NULL) { - if (ac->aggColumnList().size() > 0) - hasAggCols = true; + // one column only need project once + CalpontSystemCatalog::OID retOid = sc->oid(); + CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); + alias = extractTableAlias(sc); + view = sc->viewName(); + + if (!sc->schemaName().empty()) + { + ct = sc->colType(); + + //XXX use this before connector sets colType in sc correctly. + if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) + ct = jobInfo.csc->colType(sc->oid()); + + //X + dictOid = isDictCol(ct); + } + else + { + retOid = (tblOid + 1) + sc->colPosition(); + ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; + } + + TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); + tupleKey = ti.key; + + // this is a string column + if (dictOid > 0) + { + map::iterator findit = jobInfo.tokenOnly.find(tupleKey); + + // if the column has never seen, and the op is count: possible need count only. + if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) + { + if (findit == jobInfo.tokenOnly.end()) + jobInfo.tokenOnly[tupleKey] = true; + } + // if aggregate other than count, token is not enough. + else if (op != 0 || doDistinct) + { + jobInfo.tokenOnly[tupleKey] = false; + } + + findit = jobInfo.tokenOnly.find(tupleKey); + + if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) + { + dictMap[tupleKey] = dictOid; + jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; + ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); + jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + } + } } - else if ((fc = dynamic_cast(srcp.get())) != NULL) - { - if (fc->aggColumnList().size() > 0) - hasAggCols = true; - } - else if (dynamic_cast(srcp.get()) != NULL) - { - std::ostringstream errmsg; - errmsg << "Invalid aggregate function nesting."; - cerr << boldStart << errmsg.str() << boldStop << endl; - throw logic_error(errmsg.str()); - } - else if ((wc = dynamic_cast(srcp.get())) == NULL) - { - std::ostringstream errmsg; - errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); - cerr << boldStart << errmsg.str() << boldStop << endl; - throw logic_error(errmsg.str()); - } - - uint64_t eid = srcp.get()->expressionId(); - ct = srcp.get()->resultType(); - TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); - tupleKey = ti.key; - - if (hasAggCols) - jobInfo.expressionVec.push_back(tupleKey); - } - - // add to project list - vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - - if (keyIt == projectKeys.end()) - { - RetColsVector::iterator it = pcv.end(); - - if (doDistinct) - it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); else - it = pcv.insert(pcv.end(), srcp); - - projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); - } - else if (doDistinct) // @bug4250, move forward distinct column if necessary. - { - uint32_t pos = distance(projectKeys.begin(), keyIt); - - if (pos >= lastGroupByPos) { - pcv[pos] = pcv[lastGroupByPos]; - pcv[lastGroupByPos] = srcp; - projectKeys[pos] = projectKeys[lastGroupByPos]; - projectKeys[lastGroupByPos] = tupleKey; - lastGroupByPos++; + const ArithmeticColumn* ac = NULL; + const FunctionColumn* fc = NULL; + const WindowFunctionColumn* wc = NULL; + bool hasAggCols = false; + + if ((ac = dynamic_cast(srcp.get())) != NULL) + { + if (ac->aggColumnList().size() > 0) + hasAggCols = true; + } + else if ((fc = dynamic_cast(srcp.get())) != NULL) + { + if (fc->aggColumnList().size() > 0) + hasAggCols = true; + } + else if (dynamic_cast(srcp.get()) != NULL) + { + std::ostringstream errmsg; + errmsg << "Invalid aggregate function nesting."; + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + else if (dynamic_cast(srcp.get()) != NULL) + { + } + else if ((wc = dynamic_cast(srcp.get())) == NULL) + { + std::ostringstream errmsg; + errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + + uint64_t eid = srcp.get()->expressionId(); + ct = srcp.get()->resultType(); + TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); + tupleKey = ti.key; + + if (hasAggCols) + jobInfo.expressionVec.push_back(tupleKey); } - } - if (doDistinct && dictOid > 0) - tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + // add to project list + vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - // remember the columns to be returned - jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + if (keyIt == projectKeys.end()) + { + RetColsVector::iterator it = pcv.end(); - if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) - jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + if (doDistinct) + it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); + else + it = pcv.insert(pcv.end(), srcp); - // bug 1499 distinct processing, save unique distinct columns - if (doDistinct && - (jobInfo.distinctColVec.end() == - find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) - { - jobInfo.distinctColVec.push_back(tupleKey); + projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); + } + else if (doDistinct) // @bug4250, move forward distinct column if necessary. + { + uint32_t pos = distance(projectKeys.begin(), keyIt); + + if (pos >= lastGroupByPos) + { + pcv[pos] = pcv[lastGroupByPos]; + pcv[lastGroupByPos] = srcp; + projectKeys[pos] = projectKeys[lastGroupByPos]; + projectKeys[lastGroupByPos] = tupleKey; + lastGroupByPos++; + } + } + + if (doDistinct && dictOid > 0) + tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + + // remember the columns to be returned + jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + + if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) + jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 3dbd01311..21c7c0af6 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -164,6 +164,9 @@ inline RowAggFunctionType functionIdMap(int planFuncId) case AggregateColumn::UDAF: return ROWAGG_UDAF; + case AggregateColumn::MULTI_PARM: + return ROWAGG_MULTI_PARM; + default: return ROWAGG_FUNCT_UNDEFINE; } @@ -1302,7 +1305,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -1468,7 +1471,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (!udafFuncCol) { - throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); @@ -1483,6 +1486,17 @@ void TupleAggregateStep::prep1PhaseAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(key); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + widthAgg.push_back(width[colProj]); + } + break; + default: { ostringstream emsg; @@ -1560,7 +1574,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(3)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVec[i]->fAuxColumnIndex = lastCol++; @@ -1675,7 +1689,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation map projColPosMap; @@ -1848,7 +1862,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -2043,7 +2057,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (!udafFuncCol) { - throw logic_error("prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); } // Return column @@ -2065,6 +2079,18 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(aggKey); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + widthAgg.push_back(widthProj[colProj]); + ++colAgg; + } + break; + default: { ostringstream emsg; @@ -2111,7 +2137,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( groupByNoDist.push_back(groupby); aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i)); } - + + projColsUDAFIndex = 0; // locate the return column position in aggregated rowgroup for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -2121,6 +2148,14 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colAgg = -1; + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + + if (udafc) + pUDAFFunc = udafc->getContext().getFunction(); + } + if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { @@ -2432,11 +2467,37 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { - SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + // update the aggregate function vector + SP_ROWAGG_FUNC_t funct; + if (aggOp == ROWAGG_UDAF) + { + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } + } + else + { + funct.reset(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + } if (aggOp == ROWAGG_COUNT_NO_OP) funct->fAuxColumnIndex = colAgg; @@ -2549,7 +2610,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep1PhaseDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVec2[i]->fAuxColumnIndex = lastCol++; @@ -2893,7 +2954,7 @@ void TupleAggregateStep::prep2PhasesAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation vector width; map projColPosMap; @@ -3036,12 +3097,11 @@ void TupleAggregateStep::prep2PhasesAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAggPm)); break; } - } if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -3240,7 +3300,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } oidsAggPm.push_back(oidsProj[colProj]); @@ -3261,6 +3321,18 @@ void TupleAggregateStep::prep2PhasesAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(scaleProj[colProj]); + precisionAggPm.push_back(precisionProj[colProj]); + typeAggPm.push_back(typeProj[colProj]); + widthAggPm.push_back(width[colProj]); + colAggPm++; + } + break; + default: { ostringstream emsg; @@ -3278,11 +3350,16 @@ void TupleAggregateStep::prep2PhasesAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap; AGG_MAP aggDupFuncMap; + projColsUDAFIndex = 0; // copy over the groupby vector // update the outputColumnIndex if returned for (uint64_t i = 0; i < groupByPm.size(); i++) @@ -3299,7 +3376,14 @@ void TupleAggregateStep::prep2PhasesAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colPm = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + ++multiParms; + continue; + } // Is this a UDAF? use the function as part of the key. + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; if (aggOp == ROWAGG_UDAF) @@ -3452,20 +3536,36 @@ void TupleAggregateStep::prep2PhasesAggregate( functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { + // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i)); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i-multiParms)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i-multiParms)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -3517,7 +3617,7 @@ void TupleAggregateStep::prep2PhasesAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = returnedColVec.size() - multiParms; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -3545,7 +3645,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; @@ -3691,6 +3791,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector groupByPm, groupByUm, groupByNoDist; vector functionVecPm, functionNoDistVec, functionVecUm; + list multiParmIndexes; uint32_t bigIntWidth = sizeof(int64_t); map, uint64_t> avgFuncDistMap; @@ -3702,7 +3803,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation vector width; map projColPosMap; @@ -3856,7 +3957,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -4050,7 +4151,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } // Return column @@ -4072,6 +4173,19 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(scaleProj[colProj]); + precisionAggPm.push_back(precisionProj[colProj]); + typeAggPm.push_back(typeProj[colProj]); + widthAggPm.push_back(width[colProj]); + multiParmIndexes.push_back(colAggPm); + colAggPm++; + } + break; + default: { ostringstream emsg; @@ -4093,12 +4207,23 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( groupByUm.push_back(groupby); } + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; for (uint32_t idx = 0; idx < functionVecPm.size(); idx++) + { SP_ROWAGG_FUNC_t funct; SP_ROWAGG_FUNC_t funcPm = functionVecPm[idx]; // UDAF support + if (funcPm->fAggFunction == ROWAGG_MULTI_PARM) + { + // Multi-Parm is not used on the UM + ++multiParms; + continue; + } if (funcPm->fAggFunction == ROWAGG_UDAF) { RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); @@ -4106,7 +4231,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); } else @@ -4116,18 +4241,25 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funcPm->fStatsFunction, funcPm->fOutputColumnIndex, funcPm->fOutputColumnIndex, - funcPm->fAuxColumnIndex)); + funcPm->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); } } - posAggUm = posAggPm; - oidsAggUm = oidsAggPm; - keysAggUm = keysAggPm; - scaleAggUm = scaleAggPm; - precisionAggUm = precisionAggPm; - widthAggUm = widthAggPm; - typeAggUm = typeAggPm; + // Copy over the PM arrays to the UM. Skip any that are a multi-parm entry. + for (uint32_t idx = 0; idx < oidsAggPm.size(); ++idx) + { + if (find (multiParmIndexes.begin(), multiParmIndexes.end(), idx ) != multiParmIndexes.end()) + { + continue; + } + oidsAggUm.push_back(oidsAggPm[idx]); + keysAggUm.push_back(keysAggPm[idx]); + scaleAggUm.push_back(scaleAggPm[idx]); + precisionAggUm.push_back(precisionAggPm[idx]); + widthAggUm.push_back(widthAggPm[idx]); + typeAggUm.push_back(typeAggPm[idx]); + } } @@ -4137,6 +4269,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4159,6 +4295,21 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colUm = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + ++multiParms; + continue; + } + + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + + if (udafc) + pUDAFFunc = udafc->getContext().getFunction(); + } + if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { @@ -4285,7 +4436,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it != aggFuncMap.end()) { - colUm = it->second; + colUm = it->second - multiParms; oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(keysAggUm[colUm]); scaleAggDist.push_back(scaleAggUm[colUm]); @@ -4309,7 +4460,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // false alarm returnColMissing = false; - colUm = it->second; + colUm = it->second - multiParms; if (aggOp == ROWAGG_SUM) { @@ -4412,21 +4563,36 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { + // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - pUDAFFunc = udafc->getContext().getFunction(); - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i)); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i-multiParms)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i-multiParms)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -4480,7 +4646,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = returnedColVec.size() - multiParms; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -4540,7 +4706,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; @@ -4687,6 +4853,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); groupBySub.push_back(groupby); + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; + // tricky part : 2 function vectors // -- dummy function vector for sub-aggregator, which does distinct only // -- aggregate function on this distinct column for rowAggDist @@ -4694,6 +4865,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } if (returnedColVec[k].first != distinctColKey) continue; @@ -4715,7 +4891,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -4732,9 +4908,15 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( { vector functionSub1 = functionNoDistVec; vector functionSub2; + int64_t multiParms = 0; for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } // search non-distinct functions in functionVec vector::iterator it = functionVecUm.begin(); @@ -4752,7 +4934,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } else if (f->fAggFunction == ROWAGG_COUNT_ASTERISK || @@ -4773,7 +4955,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 02fa4d8a4..86dc0bd2f 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4035,6 +4035,10 @@ ParseTree* buildParseTree(Item_func* item, gp_walk_info& gwi, bool& nonSupport) ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { + // MCOL-1201 For UDAnF multiple parameters + vector selCols; + vector orderCols; + if (!(gwi.thd->infinidb_vtable.cal_conn_info)) gwi.thd->infinidb_vtable.cal_conn_info = (void*)(new cal_connection_info()); @@ -4051,6 +4055,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // N.B. argument_count() is the # of formal parms to the agg fcn. InifniDB only supports 1 argument // TODO: Support more than one parm +#if 0 if (isp->argument_count() != 1 && isp->sum_func() != Item_sum::GROUP_CONCAT_FUNC && isp->sum_func() != Item_sum::UDF_SUM_FUNC) { @@ -4058,7 +4063,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_MUL_ARG_AGG); return NULL; } - +#endif AggregateColumn* ac = NULL; if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) @@ -4081,444 +4086,509 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { gwi.fatalParseError = true; gwi.parseErrorText = "Non supported aggregate type on the select clause"; + if (ac) + delete ac; return NULL; } - // special parsing for group_concat - if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + try { - Item_func_group_concat* gc = (Item_func_group_concat*)isp; + + // special parsing for group_concat + if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + Item_func_group_concat* gc = (Item_func_group_concat*)isp; vector orderCols; - RowColumn* rowCol = new RowColumn(); + RowColumn* rowCol = new RowColumn(); vector selCols; - uint32_t select_ctn = gc->count_field(); - ReturnedColumn* rc = NULL; + uint32_t select_ctn = gc->count_field(); + ReturnedColumn* rc = NULL; - for (uint32_t i = 0; i < select_ctn; i++) - { - rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); - - if (!rc || gwi.fatalParseError) - return NULL; - - selCols.push_back(SRCP(rc)); - } - - ORDER** order_item, **end; - - for (order_item = gc->get_order(), - end = order_item + gc->order_field(); order_item < end; - order_item++) - { - Item* ord_col = *(*order_item)->item; - - if (ord_col->type() == Item::INT_ITEM) + for (uint32_t i = 0; i < select_ctn; i++) { - Item_int* id = (Item_int*)ord_col; - - if (id->val_int() > (int)selCols.size()) - { - gwi.fatalParseError = true; - return NULL; - } - - rc = selCols[id->val_int() - 1]->clone(); - rc->orderPos(id->val_int() - 1); - } - else - { - rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); + rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); if (!rc || gwi.fatalParseError) { + if (ac) + delete ac; return NULL; } + + selCols.push_back(SRCP(rc)); } - // 10.2 TODO: direction is now a tri-state flag - rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); - orderCols.push_back(SRCP(rc)); - } + ORDER** order_item, **end; - rowCol->columnVec(selCols); - (dynamic_cast(ac))->orderCols(orderCols); - parm.reset(rowCol); - - if (gc->str_separator()) - { - string separator; - separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); - (dynamic_cast(ac))->separator(separator); - } - } - else - { - for (uint32_t i = 0; i < isp->argument_count(); i++) - { - Item* sfitemp = sfitempp[i]; - Item::Type sfitype = sfitemp->type(); - - switch (sfitype) + for (order_item = gc->get_order(), + end = order_item + gc->order_field(); order_item < end; + order_item++) { - case Item::FIELD_ITEM: - { - Item_field* ifp = reinterpret_cast(sfitemp); - SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + Item* ord_col = *(*order_item)->item; - if (!sc) + if (ord_col->type() == Item::INT_ITEM) + { + Item_int* id = (Item_int*)ord_col; + + if (id->val_int() > (int)selCols.size()) { gwi.fatalParseError = true; - break; + if (ac) + delete ac; + return NULL; } - parm.reset(sc); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); - TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); - gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); - break; + rc = selCols[id->val_int() - 1]->clone(); + rc->orderPos(id->val_int() - 1); } - - case Item::INT_ITEM: - case Item::STRING_ITEM: - case Item::REAL_ITEM: - case Item::DECIMAL_ITEM: + else { - // treat as count(*) - if (ac->aggOp() == AggregateColumn::COUNT) - ac->aggOp(AggregateColumn::COUNT_ASTERISK); + rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::NULL_ITEM: - { - //ac->aggOp(AggregateColumn::COUNT); - parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); - //ac->functionParms(parm); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::FUNC_ITEM: - { - Item_func* ifp = (Item_func*)sfitemp; - ReturnedColumn* rc = 0; - - // check count(1+1) case - vector tmpVec; - uint16_t parseInfo = 0; - parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); - - if (parseInfo & SUB_BIT) + if (!rc || gwi.fatalParseError) { - gwi.fatalParseError = true; - break; - } - else if (!gwi.fatalParseError && - !(parseInfo & AGG_BIT) && - !(parseInfo & AF_BIT) && - tmpVec.size() == 0) - { - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - FunctionColumn* fc = dynamic_cast(rc); - - if ((fc && fc->functionParms().empty()) || !fc) - { - //ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (dynamic_cast(rc)) - { - //@bug5229. handle constant function on aggregate argument - ac->constCol(SRCP(rc)); - break; - } - } - } - - // MySQL carelessly allows correlated aggregate function on the WHERE clause. - // Here is the work around to deal with that inconsistence. - // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; - ClauseType clauseType = gwi.clauseType; - - if (gwi.clauseType == WHERE) - gwi.clauseType = HAVING; - - // @bug 3603. for cases like max(rand()). try to build function first. - if (!rc) - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - - parm.reset(rc); - gwi.clauseType = clauseType; - - if (gwi.fatalParseError) - break; - - //ac->functionParms(parm); - break; - } - - case Item::REF_ITEM: - { - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (rc) - { - parm.reset(rc); - //ac->functionParms(parm); - break; + if (ac) + delete ac; + return NULL; } } - default: - { - gwi.fatalParseError = true; - //gwi.parseErrorText = "Non-supported Item in Aggregate function"; - } + // 10.2 TODO: direction is now a tri-state flag + rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); + orderCols.push_back(SRCP(rc)); } - if (gwi.fatalParseError) + rowCol->columnVec(selCols); + (dynamic_cast(ac))->orderCols(orderCols); + parm.reset(rowCol); + + if (gc->str_separator()) { - if (gwi.parseErrorText.empty()) - { - Message::Args args; - - if (item->name) - args.add(item->name); - else - args.add(""); - - gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); - } - - return NULL; + string separator; + separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); + (dynamic_cast(ac))->separator(separator); } } - } - - if (parm) - { - ac->functionParms(parm); - - if (isp->sum_func() == Item_sum::AVG_FUNC || - isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::DECIMAL; - ct.colWidth = 8; - ct.scale += 4; - break; - -#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; -#endif - - default: - break; - } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::COUNT_FUNC || - isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = parm->resultType().scale; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_FUNC || - isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - ct.colDataType = CalpontSystemCatalog::BIGINT; - - // no break, let fall through - - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - ct.colWidth = 8; - break; - - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::UBIGINT; - ct.colWidth = 8; - break; - -#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; -#endif - - default: - break; - } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::STD_FUNC || - isp->sum_func() == Item_sum::VARIANCE_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - ct.scale = 0; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = 0; - ct.precision = -16; // borrowed to indicate skip null value check on connector - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) - { - //Item_func_group_concat* gc = (Item_func_group_concat*)isp; - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::VARCHAR; - ct.colWidth = isp->max_length; - ct.precision = 0; - ac->resultType(ct); - } else { - ac->resultType(parm->resultType()); + for (uint32_t i = 0; i < isp->argument_count(); i++) + { + Item* sfitemp = sfitempp[i]; + Item::Type sfitype = sfitemp->type(); + + switch (sfitype) + { + case Item::FIELD_ITEM: + { + Item_field* ifp = reinterpret_cast(sfitemp); + SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + + if (!sc) + { + gwi.fatalParseError = true; + break; + } + + parm.reset(sc); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); + TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); + gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); + break; + } + + case Item::INT_ITEM: + case Item::STRING_ITEM: + case Item::REAL_ITEM: + case Item::DECIMAL_ITEM: + { + // treat as count(*) + if (ac->aggOp() == AggregateColumn::COUNT) + ac->aggOp(AggregateColumn::COUNT_ASTERISK); + + ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + break; + } + + case Item::NULL_ITEM: + { + parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); + ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + break; + } + + case Item::FUNC_ITEM: + { + Item_func* ifp = (Item_func*)sfitemp; + ReturnedColumn* rc = 0; + + // check count(1+1) case + vector tmpVec; + uint16_t parseInfo = 0; + parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); + + if (parseInfo & SUB_BIT) + { + gwi.fatalParseError = true; + break; + } + else if (!gwi.fatalParseError && + !(parseInfo & AGG_BIT) && + !(parseInfo & AF_BIT) && + tmpVec.size() == 0) + { + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + FunctionColumn* fc = dynamic_cast(rc); + + if ((fc && fc->functionParms().empty()) || !fc) + { + //ac->aggOp(AggregateColumn::COUNT_ASTERISK); + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (dynamic_cast(rc)) + { + //@bug5229. handle constant function on aggregate argument + ac->constCol(SRCP(rc)); + break; + } + } + } + + // MySQL carelessly allows correlated aggregate function on the WHERE clause. + // Here is the work around to deal with that inconsistence. + // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; + ClauseType clauseType = gwi.clauseType; + + if (gwi.clauseType == WHERE) + gwi.clauseType = HAVING; + + // @bug 3603. for cases like max(rand()). try to build function first. + if (!rc) + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + + parm.reset(rc); + gwi.clauseType = clauseType; + + if (gwi.fatalParseError) + break; + + break; + } + + case Item::REF_ITEM: + { + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (rc) + { + parm.reset(rc); + break; + } + } + + default: + { + gwi.fatalParseError = true; + //gwi.parseErrorText = "Non-supported Item in Aggregate function"; + } + } + + if (gwi.fatalParseError) + { + if (gwi.parseErrorText.empty()) + { + Message::Args args; + + if (item->name) + args.add(item->name); + else + args.add(""); + + gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); + } + + if (ac) + delete ac; + return NULL; + } + if (parm) + { + // MCOL-1201 multi-argument aggregate + ac->aggParms().push_back(parm); + } + } } - } - else - { - ac->resultType(colType_MysqlToIDB(isp)); - } - // adjust decimal result type according to internalDecimalScale - if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) - { - CalpontSystemCatalog::ColType ct = ac->resultType(); - ct.scale = gwi.internalDecimalScale; - ac->resultType(ct); - } - - // check for same aggregate on the select list - ac->expressionId(ci->expressionId++); - - if (gwi.clauseType != SELECT) - { - for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) + // Get result type + // Modified for MCOL-1201 multi-argument aggregate + if (ac->aggParms().size() > 0) { - if (*ac == gwi.returnedCols[i].get()) - ac->expressionId(gwi.returnedCols[i]->expressionId()); - } - } + // These are all one parm functions, so we can safely + // use the first parm for result type. + parm = ac->aggParms()[0]; + if (isp->sum_func() == Item_sum::AVG_FUNC || + isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); - // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will - // be applied in ExeMgr. When the ExeMgr fix is available, this checking - // will be taken out. - if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + switch (ct.colDataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::DECIMAL; + ct.colWidth = 8; + ct.scale += 4; + break; + + #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; + #endif + + default: + break; + } + + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::COUNT_FUNC || + isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = parm->resultType().scale; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_FUNC || + isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); + + switch (ct.colDataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + ct.colDataType = CalpontSystemCatalog::BIGINT; + + // no break, let fall through + + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + ct.colWidth = 8; + break; + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::UBIGINT; + ct.colWidth = 8; + break; + + #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; + #endif + + default: + break; + } + + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::STD_FUNC || + isp->sum_func() == Item_sum::VARIANCE_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + ct.scale = 0; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = 0; + ct.precision = -16; // borrowed to indicate skip null value check on connector + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + //Item_func_group_concat* gc = (Item_func_group_concat*)isp; + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::VARCHAR; + ct.colWidth = isp->max_length; + ct.precision = 0; + ac->resultType(ct); + } + else + { + // UDAF result type will be set below. + ac->resultType(parm->resultType()); + } + } + else + { + ac->resultType(colType_MysqlToIDB(isp)); + } + + // adjust decimal result type according to internalDecimalScale + if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) + { + CalpontSystemCatalog::ColType ct = ac->resultType(); + ct.scale = gwi.internalDecimalScale; + ac->resultType(ct); + } + + // check for same aggregate on the select list + ac->expressionId(ci->expressionId++); + + if (gwi.clauseType != SELECT) + { + for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) + { + if (*ac == gwi.returnedCols[i].get()) + ac->expressionId(gwi.returnedCols[i]->expressionId()); + } + } + + // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will + // be applied in ExeMgr. When the ExeMgr fix is available, this checking + // will be taken out. + if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "No project column found for aggregate function"; + if (ac) + delete ac; + return NULL; + } + else if (ac->constCol()) + { + gwi.count_asterisk_list.push_back(ac); + } + + // For UDAF, populate the context and call the UDAF init() function. + // The return type is (should be) set in context by init(). + if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) + { + UDAFColumn* udafc = dynamic_cast(ac); + + if (udafc) + { + mcsv1Context& context = udafc->getContext(); + context.setName(isp->func_name()); + + // Set up the return type defaults for the call to init() + context.setResultType(udafc->resultType().colDataType); + context.setColWidth(udafc->resultType().colWidth); + context.setScale(udafc->resultType().scale); + context.setPrecision(udafc->resultType().precision); + + context.setParamCount(udafc->aggParms().size()); + ColumnDatum colType; + ColumnDatum colTypes[udafc->aggParms().size()]; + // Build the column type vector. + // Modified for MCOL-1201 multi-argument aggregate + for (uint32_t i = 0; i < udafc->aggParms().size(); ++i) + { + const execplan::CalpontSystemCatalog::ColType& resultType + = udafc->aggParms()[i]->resultType(); + colType.dataType = resultType.colDataType; + colType.precision = resultType.precision; + colType.scale = resultType.scale; + colTypes[i] = colType; + } + + // Call the user supplied init() + mcsv1sdk::mcsv1_UDAF* udaf = context.getFunction(); + if (!udaf) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "Aggregate Function " + context.getName() + " doesn't exist in the ColumnStore engine"; + if (ac) + delete ac; + return NULL; + } + if (udaf->init(&context, colTypes) == mcsv1_UDAF::ERROR) + { + gwi.fatalParseError = true; + gwi.parseErrorText = udafc->getContext().getErrorMessage(); + if (ac) + delete ac; + return NULL; + } + + // UDAF_OVER_REQUIRED means that this function is for Window + // Function only. Reject it here in aggregate land. + if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) + { + gwi.fatalParseError = true; + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, + context.getName()); + if (ac) + delete ac; + return NULL; + } + + // Set the return type as set in init() + CalpontSystemCatalog::ColType ct; + ct.colDataType = context.getResultType(); + ct.colWidth = context.getColWidth(); + ct.scale = context.getScale(); + ct.precision = context.getPrecision(); + udafc->resultType(ct); + } + } + + } + catch (std::logic_error e) { gwi.fatalParseError = true; - gwi.parseErrorText = "No project column found for aggregate function"; + gwi.parseErrorText = "error building Aggregate Function: "; + gwi.parseErrorText += e.what(); + if (ac) + delete ac; return NULL; } - else if (ac->constCol()) + catch (...) { - gwi.count_asterisk_list.push_back(ac); + gwi.fatalParseError = true; + gwi.parseErrorText = "error building Aggregate Function: Unspecified exception"; + if (ac) + delete ac; + return NULL; } - - // For UDAF, populate the context and call the UDAF init() function. - if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) - { - UDAFColumn* udafc = dynamic_cast(ac); - - if (udafc) - { - mcsv1Context& context = udafc->getContext(); - context.setName(isp->func_name()); - - // Set up the return type defaults for the call to init() - context.setResultType(udafc->resultType().colDataType); - context.setColWidth(udafc->resultType().colWidth); - context.setScale(udafc->resultType().scale); - context.setPrecision(udafc->resultType().precision); - - COL_TYPES colTypes; - execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; - - // Build the column type vector. For now, there is only one - colTypes.push_back(make_pair(udafc->functionParms()->alias(), udafc->functionParms()->resultType().colDataType)); - - // Call the user supplied init() - if (context.getFunction()->init(&context, colTypes) == mcsv1_UDAF::ERROR) - { - gwi.fatalParseError = true; - gwi.parseErrorText = udafc->getContext().getErrorMessage(); - return NULL; - } - - if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) - { - gwi.fatalParseError = true; - gwi.parseErrorText = - logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, - context.getName()); - return NULL; - } - - // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; - ct.colDataType = context.getResultType(); - ct.colWidth = context.getColWidth(); - ct.scale = context.getScale(); - ct.precision = context.getPrecision(); - udafc->resultType(ct); - } - } - return ac; } @@ -7834,7 +7904,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->functionParms(minSc); + (*coliter)->aggParms().push_back(minSc); } std::vector::iterator funciter; @@ -9898,7 +9968,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->functionParms(minSc); + (*coliter)->aggParms().push_back(minSc); } std::vector::iterator funciter; diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 12fa74fa5..5ec4307a9 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -779,8 +779,11 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, bool h //double double_val = *(double*)(&value); //f2->store(double_val); - if (f2->decimals() < (uint32_t)row.getScale(s)) - f2->dec = (uint32_t)row.getScale(s); + if ((f2->decimals() == DECIMAL_NOT_SPECIFIED && row.getScale(s) > 0) + || f2->decimals() < row.getScale(s)) + { + f2->dec = row.getScale(s); + } f2->store(dl); @@ -5273,7 +5276,6 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE execplan::CalpontSelectExecutionPlan::ColumnMap::iterator colMapIter; execplan::CalpontSelectExecutionPlan::ColumnMap::iterator condColMapIter; execplan::ParseTree* ptIt; - execplan::ReturnedColumn* rcIt; for(TABLE_LIST* tl = gi.groupByTables; tl; tl=tl->next_local) { mapiter = ci->tableMap.find(tl->table); diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp index 1635c815a..cf6abb6d6 100644 --- a/dbcon/mysql/ha_window_function.cpp +++ b/dbcon/mysql/ha_window_function.cpp @@ -340,6 +340,7 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n ac->distinct(item_sum->has_with_distinct()); Window_spec* win_spec = wf->window_spec; SRCP srcp; + CalpontSystemCatalog::ColType ct; // For return type // arguments vector funcParms; @@ -370,18 +371,25 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n context.setColWidth(rt.colWidth); context.setScale(rt.scale); context.setPrecision(rt.precision); + context.setParamCount(funcParms.size()); + + mcsv1sdk::ColumnDatum colType; + mcsv1sdk::ColumnDatum colTypes[funcParms.size()]; // Turn on the Analytic flag so the function is aware it is being called // as a Window Function. context.setContextFlag(CONTEXT_IS_ANALYTIC); - COL_TYPES colTypes; - execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; - // Build the column type vector. + // Modified for MCOL-1201 multi-argument aggregate for (size_t i = 0; i < funcParms.size(); ++i) { - colTypes.push_back(make_pair(funcParms[i]->alias(), funcParms[i]->resultType().colDataType)); + const execplan::CalpontSystemCatalog::ColType& resultType + = funcParms[i]->resultType(); + colType.dataType = resultType.colDataType; + colType.precision = resultType.precision; + colType.scale = resultType.scale; + colTypes[i] = colType; } // Call the user supplied init() @@ -401,7 +409,6 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n } // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; ct.colDataType = context.getResultType(); ct.colWidth = context.getColWidth(); ct.scale = context.getScale(); @@ -419,10 +426,10 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n { case Item_sum::UDF_SUM_FUNC: { - uint64_t bIgnoreNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)); - char sIgnoreNulls[18]; - sprintf(sIgnoreNulls, "%lu", bIgnoreNulls); - srcp.reset(new ConstantColumn(sIgnoreNulls, (uint64_t)bIgnoreNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT + uint64_t bRespectNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) ? 0 : 1; + char sRespectNulls[18]; + sprintf(sRespectNulls, "%lu", bRespectNulls); + srcp.reset(new ConstantColumn(sRespectNulls, (uint64_t)bRespectNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT funcParms.push_back(srcp); break; } @@ -880,11 +887,13 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n return NULL; } - ac->resultType(colType_MysqlToIDB(item_sum)); - - // bug5736. Make the result type double for some window functions when - // infinidb_double_for_decimal_math is set. - ac->adjustResultType(); + if (item_sum->sum_func() != Item_sum::UDF_SUM_FUNC) + { + ac->resultType(colType_MysqlToIDB(item_sum)); + // bug5736. Make the result type double for some window functions when + // infinidb_double_for_decimal_math is set. + ac->adjustResultType(); + } ac->expressionId(ci->expressionId++); diff --git a/utils/common/any.hpp b/utils/common/any.hpp index be0ca679b..5408c5c87 100755 --- a/utils/common/any.hpp +++ b/utils/common/any.hpp @@ -9,123 +9,142 @@ * http://www.boost.org/LICENSE_1_0.txt */ +#include #include namespace static_any { namespace anyimpl { + struct bad_any_cast + { + }; - struct bad_any_cast - { - }; + struct empty_any + { + }; - struct empty_any - { - }; + struct base_any_policy + { + virtual void static_delete(void** x) = 0; + virtual void copy_from_value(void const* src, void** dest) = 0; + virtual void clone(void* const* src, void** dest) = 0; + virtual void move(void* const* src, void** dest) = 0; + virtual void* get_value(void** src) = 0; + virtual size_t get_size() = 0; + }; - struct base_any_policy - { - virtual void static_delete(void** x) = 0; - virtual void copy_from_value(void const* src, void** dest) = 0; - virtual void clone(void* const* src, void** dest) = 0; - virtual void move(void* const* src, void** dest) = 0; - virtual void* get_value(void** src) = 0; - virtual size_t get_size() = 0; - }; + template + struct typed_base_any_policy : base_any_policy + { + virtual size_t get_size() + { + return sizeof(T); + } + }; - template - struct typed_base_any_policy : base_any_policy - { - virtual size_t get_size() { return sizeof(T); } - }; + template + struct small_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) + { + } + virtual void copy_from_value(void const* src, void** dest) + { + new(dest) T(*reinterpret_cast(src)); + } + virtual void clone(void* const* src, void** dest) + { + *dest = *src; + } + virtual void move(void* const* src, void** dest) + { + *dest = *src; + } + virtual void* get_value(void** src) + { + return reinterpret_cast(src); + } + }; - template - struct small_any_policy : typed_base_any_policy - { - virtual void static_delete(void** x) { } - virtual void copy_from_value(void const* src, void** dest) - { new(dest) T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) { *dest = *src; } - virtual void move(void* const* src, void** dest) { *dest = *src; } - virtual void* get_value(void** src) { return reinterpret_cast(src); } - }; - - template - struct big_any_policy : typed_base_any_policy - { - virtual void static_delete(void** x) + template + struct big_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) { if (*x) - delete(*reinterpret_cast(x)); + delete(*reinterpret_cast(x)); *x = NULL; } - virtual void copy_from_value(void const* src, void** dest) + virtual void copy_from_value(void const* src, void** dest) { - *dest = new T(*reinterpret_cast(src)); + *dest = new T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) + virtual void clone(void* const* src, void** dest) { - *dest = new T(**reinterpret_cast(src)); + *dest = new T(**reinterpret_cast(src)); } - virtual void move(void* const* src, void** dest) + virtual void move(void* const* src, void** dest) { - (*reinterpret_cast(dest))->~T(); - **reinterpret_cast(dest) = **reinterpret_cast(src); + (*reinterpret_cast(dest))->~T(); + **reinterpret_cast(dest) = **reinterpret_cast(src); } - virtual void* get_value(void** src) { return *src; } - }; + virtual void* get_value(void** src) + { + return *src; + } + }; - template - struct choose_policy - { - typedef big_any_policy type; - }; + template + struct choose_policy + { + typedef big_any_policy type; + }; - template - struct choose_policy - { - typedef small_any_policy type; - }; + template + struct choose_policy + { + typedef small_any_policy type; + }; - struct any; + struct any; - /// Choosing the policy for an any type is illegal, but should never happen. - /// This is designed to throw a compiler error. - template<> - struct choose_policy - { - typedef void type; - }; + /// Choosing the policy for an any type is illegal, but should never happen. + /// This is designed to throw a compiler error. + template<> + struct choose_policy + { + typedef void type; + }; - /// Specializations for small types. - #define SMALL_POLICY(TYPE) template<> struct \ - choose_policy { typedef small_any_policy type; }; + /// Specializations for small types. +#define SMALL_POLICY(TYPE) template<> struct \ + choose_policy { typedef small_any_policy type; }; - SMALL_POLICY(char); - SMALL_POLICY(signed char); - SMALL_POLICY(unsigned char); - SMALL_POLICY(signed short); - SMALL_POLICY(unsigned short); - SMALL_POLICY(signed int); - SMALL_POLICY(unsigned int); - SMALL_POLICY(signed long); - SMALL_POLICY(unsigned long); - SMALL_POLICY(signed long long); - SMALL_POLICY(unsigned long long); - SMALL_POLICY(float); - SMALL_POLICY(double); - SMALL_POLICY(bool); + SMALL_POLICY(char); + SMALL_POLICY(signed char); + SMALL_POLICY(unsigned char); + SMALL_POLICY(signed short); + SMALL_POLICY(unsigned short); + SMALL_POLICY(signed int); + SMALL_POLICY(unsigned int); + SMALL_POLICY(signed long); + SMALL_POLICY(unsigned long); + SMALL_POLICY(signed long long); + SMALL_POLICY(unsigned long long); + SMALL_POLICY(float); + SMALL_POLICY(double); + SMALL_POLICY(bool); - #undef SMALL_POLICY +#undef SMALL_POLICY - /// This function will return a different policy for each type. - template - base_any_policy* get_policy() - { - static typename choose_policy::type policy; - return &policy; - }; + /// This function will return a different policy for each type. + template + base_any_policy* get_policy() + { + static typename choose_policy::type policy; + return &policy; + }; } class any @@ -139,37 +158,40 @@ public: /// Initializing constructor. template any(const T& x) - : policy(anyimpl::get_policy()), object(NULL) + : policy(anyimpl::get_policy()), object(NULL) { assign(x); } /// Empty constructor. any() - : policy(anyimpl::get_policy()), object(NULL) - { } + : policy(anyimpl::get_policy()), object(NULL) + { + } /// Special initializing constructor for string literals. any(const char* x) - : policy(anyimpl::get_policy()), object(NULL) - { + : policy(anyimpl::get_policy()), object(NULL) + { assign(x); } /// Copy constructor. any(const any& x) - : policy(anyimpl::get_policy()), object(NULL) - { + : policy(anyimpl::get_policy()), object(NULL) + { assign(x); } /// Destructor. - ~any() { + ~any() + { policy->static_delete(&object); } /// Assignment function from another any. - any& assign(const any& x) { + any& assign(const any& x) + { reset(); policy = x.policy; policy->clone(&x.object, &object); @@ -178,7 +200,8 @@ public: /// Assignment function. template - any& assign(const T& x) { + any& assign(const T& x) + { reset(); policy = anyimpl::get_policy(); policy->copy_from_value(&x, &object); @@ -197,8 +220,42 @@ public: return assign(x); } + /// Less than operator for sorting + bool operator<(const any& x) const + { + if (policy == x.policy) + { + void* p1 = const_cast(object); + void* p2 = const_cast(x.object); + return memcmp(policy->get_value(&p1), + x.policy->get_value(&p2), + policy->get_size()) < 0 ? 1 : 0; + } + return 0; + } + + /// equal operator + bool operator==(const any& x) const + { + if (policy == x.policy) + { + void* p1 = const_cast(object); + void* p2 = const_cast(x.object); + return memcmp(policy->get_value(&p1), + x.policy->get_value(&p2), + policy->get_size()) == 0 ? 1 : 0; + } + return 0; + } + /// Utility functions - any& swap(any& x) { + uint8_t getHash() const + { + void* p1 = const_cast(object); + return *(uint64_t*)policy->get_value(&p1) % 4048; + } + any& swap(any& x) + { std::swap(policy, x.policy); std::swap(object, x.object); return *this; @@ -206,27 +263,32 @@ public: /// Cast operator. You can only cast to the original type. template - T& cast() { - if (policy != anyimpl::get_policy()) + T& cast() + { + if (policy != anyimpl::get_policy()) throw anyimpl::bad_any_cast(); T* r = reinterpret_cast(policy->get_value(&object)); return *r; } /// Returns true if the any contains no value. - bool empty() const { + bool empty() const + { return policy == anyimpl::get_policy(); } /// Frees any allocated memory, and sets the value to NULL. - void reset() { + void reset() + { policy->static_delete(&object); policy = anyimpl::get_policy(); } /// Returns true if the two types are the same. - bool compatible(const any& x) const { + bool compatible(const any& x) const + { return policy == x.policy; } }; + } diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 8d110cfc8..c1f5bbd63 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -215,6 +215,22 @@ inline string getStringNullValue() namespace rowgroup { +const std::string typeStr(""); +const static_any::any& RowAggregation::charTypeId((char)1); +const static_any::any& RowAggregation::scharTypeId((signed char)1); +const static_any::any& RowAggregation::shortTypeId((short)1); +const static_any::any& RowAggregation::intTypeId((int)1); +const static_any::any& RowAggregation::longTypeId((long)1); +const static_any::any& RowAggregation::llTypeId((long long)1); +const static_any::any& RowAggregation::ucharTypeId((unsigned char)1); +const static_any::any& RowAggregation::ushortTypeId((unsigned short)1); +const static_any::any& RowAggregation::uintTypeId((unsigned int)1); +const static_any::any& RowAggregation::ulongTypeId((unsigned long)1); +const static_any::any& RowAggregation::ullTypeId((unsigned long long)1); +const static_any::any& RowAggregation::floatTypeId((float)1); +const static_any::any& RowAggregation::doubleTypeId((double)1); +const static_any::any& RowAggregation::strTypeId(typeStr); + KeyStorage::KeyStorage(const RowGroup& keys, Row** tRow) : tmpRow(tRow), rg(keys) { RGData data(rg); @@ -691,7 +707,8 @@ RowAggregation::RowAggregation(const vector& rowAggGroupByCol RowAggregation::RowAggregation(const RowAggregation& rhs): fAggMapPtr(NULL), fRowGroupOut(NULL), fTotalRowCount(0), fMaxTotalRowCount(AGG_ROWGROUP_SIZE), - fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0) + fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0), + fRGContext(rhs.fRGContext) { //fGroupByCols.clear(); //fFunctionCols.clear(); @@ -756,7 +773,6 @@ void RowAggregation::addRowGroup(const RowGroup* pRows, vector& in { // this function is for threaded aggregation, which is for group by and distinct. // if (countSpecial(pRows)) - Row rowIn; pRows->initRow(&rowIn); @@ -790,7 +806,7 @@ void RowAggregation::setJoinRowGroups(vector* pSmallSideRG, RowGroup* } //------------------------------------------------------------------------------ -// For UDAF, we need to sometimes start a new context. +// For UDAF, we need to sometimes start a new fRGContext. // // This will be called any number of times by each of the batchprimitiveprocessor // threads on the PM and by multple threads on the UM. It must remain @@ -801,29 +817,29 @@ void RowAggregation::resetUDAF(uint64_t funcColID) // Get the UDAF class pointer and store in the row definition object. RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColID].get()); - // resetUDAF needs to be re-entrant. Since we're modifying the context object - // by creating a new userData, we need a local copy. The copy constructor - // doesn't copy userData. - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + // RowAggregation and it's functions need to be re-entrant which means + // each instance (thread) needs its own copy of the context object. + // Note: operator=() doesn't copy userData. + fRGContext = rowUDAF->fUDAFContext; // Call the user reset for the group userData. Since, at this point, // context's userData will be NULL, reset will generate a new one. mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->reset(&rgContext); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } fRow.setUserDataStore(fRowGroupOut->getRGData()->getUserDataStore()); - fRow.setUserData(rgContext, - rgContext.getUserDataSP(), - rgContext.getUserDataSize(), + fRow.setUserData(fRGContext, + fRGContext.getUserDataSP(), + fRGContext.getUserDataSize(), rowUDAF->fAuxColumnIndex); - rgContext.setUserData(NULL); // Prevents calling deleteUserData on the context. + fRGContext.setUserData(NULL); // Prevents calling deleteUserData on the fRGContext. } //------------------------------------------------------------------------------ @@ -873,7 +889,6 @@ void RowAggregation::initialize() } } - // Save the RowGroup data pointer fResultDataVec.push_back(fRowGroupOut->getRGData()); @@ -1658,10 +1673,11 @@ void RowAggregation::updateEntry(const Row& rowIn) { for (uint64_t i = 0; i < fFunctionCols.size(); i++) { - int64_t colIn = fFunctionCols[i]->fInputColumnIndex; - int64_t colOut = fFunctionCols[i]->fOutputColumnIndex; + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[i]; + int64_t colIn = pFunctionCol->fInputColumnIndex; + int64_t colOut = pFunctionCol->fOutputColumnIndex; - switch (fFunctionCols[i]->fAggFunction) + switch (pFunctionCol->fAggFunction) { case ROWAGG_COUNT_COL_NAME: @@ -1675,7 +1691,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_MIN: case ROWAGG_MAX: case ROWAGG_SUM: - doMinMaxSum(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction); + doMinMaxSum(rowIn, colIn, colOut, pFunctionCol->fAggFunction); break; case ROWAGG_AVG: @@ -1692,7 +1708,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_BIT_OR: case ROWAGG_BIT_XOR: { - doBitOp(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction); + doBitOp(rowIn, colIn, colOut, pFunctionCol->fAggFunction); break; } @@ -1707,11 +1723,11 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); + RowUDAFFunctionCol* rowUDAF = dynamic_cast(pFunctionCol.get()); if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF); + doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF, i); } else { @@ -1725,7 +1741,7 @@ void RowAggregation::updateEntry(const Row& rowIn) { std::ostringstream errmsg; errmsg << "RowAggregation: function (id = " << - (uint64_t) fFunctionCols[i]->fAggFunction << ") is not supported."; + (uint64_t) pFunctionCol->fAggFunction << ") is not supported."; cerr << errmsg.str() << endl; throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); break; @@ -1997,131 +2013,142 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu } void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF) + RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { - std::vector valsIn; - execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupIn.getColTypes()[colIn]; - std::vector dataFlags; + int32_t paramCount = fRGContext.getParameterCount(); + // The vector of parameters to be sent to the UDAF + mcsv1sdk::ColumnDatum valsIn[paramCount]; + uint32_t dataFlags[paramCount]; - // Get the context for this rowGroup. Make a copy so we're thread safe. - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); - - // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; - - if (isNull(&fRowGroupIn, rowIn, colIn) == true) + execplan::CalpontSystemCatalog::ColDataType colDataType; + for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) { - if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + // Turn on NULL flags + dataFlags[i] = 0; + if (isNull(&fRowGroupIn, rowIn, colIn) == true) { - return; + if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + { + return; + } + dataFlags[i] |= mcsv1sdk::PARAM_IS_NULL; + } + + colDataType = fRowGroupIn.getColTypes()[colIn]; + if (!fRGContext.isParamNull(i)) + { + switch (colDataType) + { + case execplan::CalpontSystemCatalog::TINYINT: + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + case execplan::CalpontSystemCatalog::INT: + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + break; + } + + case execplan::CalpontSystemCatalog::UTINYINT: + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + case execplan::CalpontSystemCatalog::UINT: + case execplan::CalpontSystemCatalog::UBIGINT: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + datum.columnData = rowIn.getUintField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + { + datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; + datum.columnData = rowIn.getDoubleField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + { + datum.dataType = execplan::CalpontSystemCatalog::FLOAT; + datum.columnData = rowIn.getFloatField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + datum.columnData = rowIn.getUintField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::TIME: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + datum.columnData = rowIn.getIntField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + { + datum.dataType = colDataType; + datum.columnData = rowIn.getStringField(colIn); + break; + } + + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation " << fRGContext.getName() << + ": No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } + } } - flag |= mcsv1sdk::PARAM_IS_NULL; - } - - flags.push_back(flag); - rgContext.setDataFlags(&flags); - - mcsv1sdk::ColumnDatum datum; - - if (!rgContext.isParamNull(0)) - { - switch (colDataType) + // MCOL-1201: If there are multiple parameters, the next fFunctionCols + // will have the column used. By incrementing the funcColsIdx (passed by + // ref, we also increment the caller's index. + if (fFunctionCols.size() > funcColsIdx + 1 + && fFunctionCols[funcColsIdx+1]->fAggFunction == ROWAGG_MULTI_PARM) { - case execplan::CalpontSystemCatalog::TINYINT: - case execplan::CalpontSystemCatalog::SMALLINT: - case execplan::CalpontSystemCatalog::MEDINT: - case execplan::CalpontSystemCatalog::INT: - case execplan::CalpontSystemCatalog::BIGINT: - case execplan::CalpontSystemCatalog::DECIMAL: - case execplan::CalpontSystemCatalog::UDECIMAL: - { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - datum.scale = fRowGroupIn.getScale()[colIn]; - datum.precision = fRowGroupIn.getPrecision()[colIn]; - break; - } - - case execplan::CalpontSystemCatalog::UTINYINT: - case execplan::CalpontSystemCatalog::USMALLINT: - case execplan::CalpontSystemCatalog::UMEDINT: - case execplan::CalpontSystemCatalog::UINT: - case execplan::CalpontSystemCatalog::UBIGINT: - { - datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::DOUBLE: - case execplan::CalpontSystemCatalog::UDOUBLE: - { - datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; - datum.columnData = rowIn.getDoubleField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::FLOAT: - case execplan::CalpontSystemCatalog::UFLOAT: - { - datum.dataType = execplan::CalpontSystemCatalog::FLOAT; - datum.columnData = rowIn.getFloatField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::DATE: - case execplan::CalpontSystemCatalog::DATETIME: - { - datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::TIME: - { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::CHAR: - case execplan::CalpontSystemCatalog::VARCHAR: - case execplan::CalpontSystemCatalog::TEXT: - case execplan::CalpontSystemCatalog::VARBINARY: - case execplan::CalpontSystemCatalog::CLOB: - case execplan::CalpontSystemCatalog::BLOB: - { - datum.dataType = colDataType; - datum.columnData = rowIn.getStringField(colIn); - break; - } - - default: - { - std::ostringstream errmsg; - errmsg << "RowAggregation " << rgContext.getName() << - ": No logic for data type: " << colDataType; - throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); - break; - } + ++funcColsIdx; + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; + colIn = pFunctionCol->fInputColumnIndex; + colOut = pFunctionCol->fOutputColumnIndex; + } + else + { + break; } } - valsIn.push_back(datum); - // The intermediate values are stored in userData referenced by colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setDataFlags(dataFlags); + fRGContext.setUserData(fRow.getUserData(colAux)); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->nextValue(&rgContext, valsIn); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } } @@ -2218,6 +2245,7 @@ RowAggregationUM::RowAggregationUM(const RowAggregationUM& rhs) : fHasAvg(rhs.fHasAvg), fKeyOnHeap(rhs.fKeyOnHeap), fHasStatsFunc(rhs.fHasStatsFunc), + fHasUDAF(rhs.fHasUDAF), fExpression(rhs.fExpression), fTotalMemUsage(rhs.fTotalMemUsage), fRm(rhs.fRm), @@ -2419,7 +2447,7 @@ void RowAggregationUM::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { @@ -2585,22 +2613,6 @@ void RowAggregationUM::calculateAvgColumns() // Sets the value from valOut into column colOut, performing any conversions. void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) { - static const static_any::any& charTypeId((char)1); - static const static_any::any& scharTypeId((signed char)1); - static const static_any::any& shortTypeId((short)1); - static const static_any::any& intTypeId((int)1); - static const static_any::any& longTypeId((long)1); - static const static_any::any& llTypeId((long long)1); - static const static_any::any& ucharTypeId((unsigned char)1); - static const static_any::any& ushortTypeId((unsigned short)1); - static const static_any::any& uintTypeId((unsigned int)1); - static const static_any::any& ulongTypeId((unsigned long)1); - static const static_any::any& ullTypeId((unsigned long long)1); - static const static_any::any& floatTypeId((float)1); - static const static_any::any& doubleTypeId((double)1); - static const std::string typeStr(""); - static const static_any::any& strTypeId(typeStr); - execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; if (valOut.empty()) @@ -2609,6 +2621,179 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) return; } + int64_t intOut = 0; + uint64_t uintOut = 0; + float floatOut = 0.0; + double doubleOut = 0.0; + ostringstream oss; + std::string strOut; + + bool bSetSuccess = false; + switch (colDataType) + { + case execplan::CalpontSystemCatalog::BIT: + case execplan::CalpontSystemCatalog::TINYINT: + if (valOut.compatible(charTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + else if (valOut.compatible(scharTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + if (bSetSuccess) + { + fRow.setIntField<1>(intOut, colOut); + } + break; + + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + if (valOut.compatible(shortTypeId)) + { + intOut = valOut.cast(); + fRow.setIntField<2>(intOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::INT: + if (valOut.compatible(uintTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + else if (valOut.compatible(longTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + if (bSetSuccess) + { + fRow.setIntField<4>(intOut, colOut); + } + break; + + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + if (valOut.compatible(llTypeId)) + { + intOut = valOut.cast(); + fRow.setIntField<8>(intOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UTINYINT: + if (valOut.compatible(ucharTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<1>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + if (valOut.compatible(ushortTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<2>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UINT: + if (valOut.compatible(uintTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<4>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UBIGINT: + if (valOut.compatible(ulongTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<8>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + if (valOut.compatible(ulongTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<8>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + if (valOut.compatible(floatTypeId)) + { + floatOut = valOut.cast(); + fRow.setFloatField(floatOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + if (valOut.compatible(doubleTypeId)) + { + doubleOut = valOut.cast(); + fRow.setDoubleField(doubleOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + fRow.setStringField(strOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + fRow.setVarBinaryField(strOut, colOut); + bSetSuccess = true; + } + break; + + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation: No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } + } + if (!bSetSuccess) + { + SetUDAFAnyValue(valOut, colOut); + } +} + +void RowAggregationUM::SetUDAFAnyValue(static_any::any& valOut, int64_t colOut) +{ + execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; + // This may seem a bit convoluted. Users shouldn't return a type // that they didn't set in mcsv1_UDAF::init(), but this // handles whatever return type is given and casts @@ -2814,7 +2999,7 @@ void RowAggregationUM::calculateUDAFColumns() continue; rowUDAF = dynamic_cast(fFunctionCols[i].get()); - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + fRGContext = rowUDAF->fUDAFContext; int64_t colOut = rowUDAF->fOutputColumnIndex; int64_t colAux = rowUDAF->fAuxColumnIndex; @@ -2826,26 +3011,26 @@ void RowAggregationUM::calculateUDAFColumns() fRowGroupOut->getRow(j, &fRow); // Turn the NULL flag off. We can't know NULL at this point - rgContext.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); // The intermediate values are stored in colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setUserData(fRow.getUserData(colAux)); // Call the UDAF evaluate function mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->evaluate(&rgContext, valOut); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); } - rgContext.setUserData(NULL); + fRGContext.setUserData(NULL); } } @@ -3116,54 +3301,60 @@ void RowAggregationUM::doNullConstantAggregate(const ConstantAggData& aggData, u { // For a NULL constant, call nextValue with NULL and then evaluate. bool bInterrupted = false; - mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext); - context.setInterrupted(bInterrupted); - context.createUserData(); + fRGContext.setInterrupted(bInterrupted); + fRGContext.createUserData(); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - std::vector valsIn; + mcsv1sdk::ColumnDatum valsIn[1]; // Call a reset, then nextValue, then execute. This will evaluate // the UDAF for the constant. - rc = context.getFunction()->reset(&context); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } - +#if 0 + uint32_t dataFlags[fRGContext.getParameterCount()]; + for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) + { + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + // Turn on NULL flags + dataFlags[i] = 0; + } +#endif // Turn the NULL and CONSTANT flags on. - std::vector flags; - uint32_t flag = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; - flags.push_back(flag); - context.setDataFlags(&flags); + uint32_t flags[1]; + flags[0] = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; + fRGContext.setDataFlags(flags); // Create a dummy datum - mcsv1sdk::ColumnDatum datum; + mcsv1sdk::ColumnDatum& datum = valsIn[0]; datum.dataType = execplan::CalpontSystemCatalog::BIGINT; datum.columnData = 0; - valsIn.push_back(datum); - rc = context.getFunction()->nextValue(&context, valsIn); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } static_any::any valOut; - rc = context.getFunction()->evaluate(&context, valOut); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); - context.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); } break; @@ -3460,30 +3651,28 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData case ROWAGG_UDAF: { bool bInterrupted = false; - mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext); - context.setInterrupted(bInterrupted); - context.createUserData(); + fRGContext.setInterrupted(bInterrupted); + fRGContext.createUserData(); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - std::vector valsIn; + mcsv1sdk::ColumnDatum valsIn[1]; // Call a reset, then nextValue, then execute. This will evaluate // the UDAF for the constant. - rc = context.getFunction()->reset(&context); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Turn the CONSTANT flags on. - std::vector flags; - uint32_t flag = mcsv1sdk::PARAM_IS_CONSTANT; - flags.push_back(flag); - context.setDataFlags(&flags); + uint32_t flags[1]; + flags[0] = mcsv1sdk::PARAM_IS_CONSTANT; + fRGContext.setDataFlags(flags); // Create a datum item for sending to UDAF - mcsv1sdk::ColumnDatum datum; + mcsv1sdk::ColumnDatum& datum = valsIn[0]; datum.dataType = (CalpontSystemCatalog::ColDataType)colDataType; switch (colDataType) @@ -3567,27 +3756,27 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData break; } - valsIn.push_back(datum); - rc = context.getFunction()->nextValue(&context, valsIn); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } static_any::any valOut; - rc = context.getFunction()->evaluate(&context, valOut); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); - context.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); } break; @@ -3806,7 +3995,7 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { @@ -4011,45 +4200,43 @@ void RowAggregationUMP2::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut // rowUDAF(in) - pointer to the RowUDAFFunctionCol for this UDAF instance //------------------------------------------------------------------------------ void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF) + RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { static_any::any valOut; - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); // Get the user data boost::shared_ptr userData = rowIn.getUserData(colIn + 1); // Unlike other aggregates, the data isn't in colIn, so testing it for NULL // there won't help. In case of NULL, userData will be NULL. - std::vector flags; - uint32_t flag = 0; + uint32_t flags[1]; + flags[0] = 0; if (!userData) { - if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { return; } // Turn on NULL flags - flag |= mcsv1sdk::PARAM_IS_NULL; + flags[0] |= mcsv1sdk::PARAM_IS_NULL; } - flags.push_back(flag); - rgContext.setDataFlags(&flags); + fRGContext.setDataFlags(flags); // The intermediate values are stored in colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setUserData(fRow.getUserData(colAux)); // Call the UDAF subEvaluate method mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->subEvaluate(&rgContext, userData.get()); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userData.get()); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::IDBExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::IDBExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } } @@ -4246,7 +4433,7 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index b6294f193..282f354fc 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -110,6 +110,9 @@ enum RowAggFunctionType // User Defined Aggregate Function ROWAGG_UDAF, + // If an Aggregate has more than one parameter, this will be used for parameters after the first + ROWAGG_MULTI_PARM, + // internal function type to avoid duplicate the work // handling ROWAGG_COUNT_NO_OP, ROWAGG_DUP_FUNCT and ROWAGG_DUP_AVG is a little different // ROWAGG_COUNT_NO_OP : count done by AVG, no need to copy @@ -583,7 +586,7 @@ protected: virtual void doAvg(const Row&, int64_t, int64_t, int64_t); virtual void doStatistics(const Row&, int64_t, int64_t, int64_t); virtual void doBitOp(const Row&, int64_t, int64_t, int); - virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); + virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); virtual bool countSpecial(const RowGroup* pRG) { fRow.setIntField<8>(fRow.getIntField<8>(0) + pRG->getRowCount(), 0); @@ -660,6 +663,25 @@ protected: //need access to rowgroup storage holding the rows to hash & ==. friend class AggHasher; friend class AggComparator; + + // We need a separate copy for each thread. + mcsv1sdk::mcsv1Context fRGContext; + + // These are handy for testing the actual type of static_any for UDAF + static const static_any::any& charTypeId; + static const static_any::any& scharTypeId; + static const static_any::any& shortTypeId; + static const static_any::any& intTypeId; + static const static_any::any& longTypeId; + static const static_any::any& llTypeId; + static const static_any::any& ucharTypeId; + static const static_any::any& ushortTypeId; + static const static_any::any& uintTypeId; + static const static_any::any& ulongTypeId; + static const static_any::any& ullTypeId; + static const static_any::any& floatTypeId; + static const static_any::any& doubleTypeId; + static const static_any::any& strTypeId; }; //------------------------------------------------------------------------------ @@ -783,6 +805,9 @@ protected: // Sets the value from valOut into column colOut, performing any conversions. void SetUDAFValue(static_any::any& valOut, int64_t colOut); + // If the datatype returned by evaluate isn't what we expect, convert. + void SetUDAFAnyValue(static_any::any& valOut, int64_t colOut); + // calculate the UDAF function all rows received. UM only function. void calculateUDAFColumns(); @@ -877,7 +902,7 @@ protected: void doStatistics(const Row&, int64_t, int64_t, int64_t); void doGroupConcat(const Row&, int64_t, int64_t); void doBitOp(const Row&, int64_t, int64_t, int); - void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); + void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); bool countSpecial(const RowGroup* pRG) { return false; diff --git a/utils/udfsdk/CMakeLists.txt b/utils/udfsdk/CMakeLists.txt index e69ff4d88..01009e35a 100755 --- a/utils/udfsdk/CMakeLists.txt +++ b/utils/udfsdk/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ########### next target ############### -set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp) +set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp regr_avgx.cpp avgx.cpp) add_definitions(-DMYSQL_DYNAMIC_PLUGIN) diff --git a/utils/udfsdk/allnull.cpp b/utils/udfsdk/allnull.cpp index b6b8d79da..247b9e28f 100644 --- a/utils/udfsdk/allnull.cpp +++ b/utils/udfsdk/allnull.cpp @@ -27,11 +27,11 @@ struct allnull_data #define OUT_TYPE int64_t mcsv1_UDAF::ReturnCode allnull::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { context->setUserDataSize(sizeof(allnull_data)); - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -52,8 +52,7 @@ mcsv1_UDAF::ReturnCode allnull::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { struct allnull_data* data = (struct allnull_data*)context->getUserData()->data; diff --git a/utils/udfsdk/allnull.h b/utils/udfsdk/allnull.h index 86697b052..da17f5d6b 100644 --- a/utils/udfsdk/allnull.h +++ b/utils/udfsdk/allnull.h @@ -103,7 +103,7 @@ public: * colTypes or wrong number of arguments. Else return * mcsv1_UDAF::SUCCESS. */ - virtual ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); /** * reset() @@ -138,7 +138,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() diff --git a/utils/udfsdk/avg_mode.cpp b/utils/udfsdk/avg_mode.cpp index f39b5e402..5429183d9 100644 --- a/utils/udfsdk/avg_mode.cpp +++ b/utils/udfsdk/avg_mode.cpp @@ -25,9 +25,9 @@ using namespace mcsv1sdk; mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("avg_mode() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode avg_mode::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; MODE_DATA& data = static_cast(context->getUserData())->mData; @@ -187,8 +186,7 @@ mcsv1_UDAF::ReturnCode avg_mode::evaluate(mcsv1Context* context, static_any::any return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; MODE_DATA& data = static_cast(context->getUserData())->mData; diff --git a/utils/udfsdk/avg_mode.h b/utils/udfsdk/avg_mode.h index 4f3442005..5722c5fea 100644 --- a/utils/udfsdk/avg_mode.h +++ b/utils/udfsdk/avg_mode.h @@ -18,7 +18,7 @@ /*********************************************************************** * $Id$ * -* mcsv1_UDAF.h +* avg_mode.h ***********************************************************************/ /** @@ -50,8 +50,8 @@ * is also used to describe the interface that is used for * either. */ -#ifndef HEADER_mode -#define HEADER_mode +#ifndef HEADER_avg_mode +#define HEADER_avg_mode #include #include @@ -134,7 +134,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -169,8 +169,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -246,8 +245,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() diff --git a/utils/udfsdk/mcsv1_udaf.cpp b/utils/udfsdk/mcsv1_udaf.cpp index 349a642ec..ee08dcc07 100644 --- a/utils/udfsdk/mcsv1_udaf.cpp +++ b/utils/udfsdk/mcsv1_udaf.cpp @@ -36,6 +36,8 @@ UDAF_MAP UDAFMap::fm; #include "ssq.h" #include "median.h" #include "avg_mode.h" +#include "regr_avgx.h" +#include "avgx.h" UDAF_MAP& UDAFMap::getMap() { if (fm.size() > 0) @@ -52,6 +54,8 @@ UDAF_MAP& UDAFMap::getMap() fm["ssq"] = new ssq(); fm["median"] = new median(); fm["avg_mode"] = new avg_mode(); + fm["regr_avgx"] = new regr_avgx(); + fm["avgx"] = new avgx(); return fm; } @@ -115,8 +119,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const { // We don't test the per row data fields. They don't determine // if it's the same Context. - if (getName() != c.getName() - || fRunFlags != c.fRunFlags + if (getName() != c.getName() + ||fRunFlags != c.fRunFlags || fContextFlags != c.fContextFlags || fUserDataSize != c.fUserDataSize || fResultType != c.fResultType @@ -125,7 +129,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const || fStartFrame != c.fStartFrame || fEndFrame != c.fEndFrame || fStartConstant != c.fStartConstant - || fEndConstant != c.fEndConstant) + || fEndConstant != c.fEndConstant + || fParamCount != c.fParamCount) return false; return true; @@ -217,6 +222,7 @@ void mcsv1Context::serialize(messageqcpp::ByteStream& b) const b << (uint32_t)fEndFrame; b << fStartConstant; b << fEndConstant; + b << fParamCount; } void mcsv1Context::unserialize(messageqcpp::ByteStream& b) @@ -238,6 +244,7 @@ void mcsv1Context::unserialize(messageqcpp::ByteStream& b) fEndFrame = (WF_FRAME)frame; b >> fStartConstant; b >> fEndConstant; + b >> fParamCount; } void UserData::serialize(messageqcpp::ByteStream& bs) const diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index d24852c28..df3f47649 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -77,6 +77,7 @@ #include "any.hpp" #include "calpontsystemcatalog.h" #include "wf_frame.h" +#include "my_decimal_limits.h" using namespace execplan; @@ -200,12 +201,8 @@ static uint64_t CONTEXT_IS_PM __attribute__ ((unused)) = 1 << 2; // Flags that describe the contents of a specific input parameter // These will be set in context->dataFlags for each method call by the framework. // User code shouldn't use these directly -static uint64_t PARAM_IS_NULL __attribute__ ((unused)) = 1; -static uint64_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; - -// shorthand for the list of columns in the call sent to init() -// first is the actual column name and second is the data type in Columnstore. -typedef std::vector >COL_TYPES; +static uint32_t PARAM_IS_NULL __attribute__ ((unused)) = 1; +static uint32_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; // This is the context class that is passed to all API callbacks // The framework potentially sets data here for each invocation of @@ -269,7 +266,9 @@ public: EXPORT bool isPM(); // Parameter refinement description accessors - // valid in nextValue and dropValue + + // How many actual parameters were entered. + // valid in all calls size_t getParameterCount() const; // Determine if an input parameter is NULL @@ -298,6 +297,7 @@ public: // This only makes sense if the return type is decimal, but should be set // to (0, -1) for other types if the inout is decimal. // valid in init() + // Set the scale to DECIMAL_NOT_SPECIFIED if you want a floating decimal. EXPORT bool setScale(int32_t scale); EXPORT bool setPrecision(int32_t precision); @@ -372,7 +372,7 @@ private: int32_t fResultscale; // For scale, the number of digits to the right of the decimal int32_t fResultPrecision; // The max number of digits allowed in the decimal value std::string errorMsg; - std::vector* dataFlags; // one entry for each parameter + uint32_t* dataFlags; // an integer array wirh one entry for each parameter bool* bInterrupted; // Gets set to true by the Framework if something happens WF_FRAME fStartFrame; // Is set to default to start, then modified by the actual frame in the call WF_FRAME fEndFrame; // Is set to default to start, then modified by the actual frame in the call @@ -380,6 +380,7 @@ private: int32_t fEndConstant; // for end frame WF_PRECEEDIMG or WF_FOLLOWING std::string functionName; mcsv1sdk::mcsv1_UDAF* func; + int32_t fParamCount; public: // For use by the framework @@ -394,13 +395,14 @@ public: EXPORT void clearContextFlag(uint64_t flag); EXPORT uint64_t getContextFlags() const; EXPORT uint32_t getUserDataSize() const; - EXPORT std::vector& getDataFlags(); - EXPORT void setDataFlags(std::vector* flags); + EXPORT uint32_t* getDataFlags(); + EXPORT void setDataFlags(uint32_t* flags); EXPORT void setInterrupted(bool interrupted); EXPORT void setInterrupted(bool* interrupted); EXPORT mcsv1sdk::mcsv1_UDAF* getFunction(); EXPORT mcsv1sdk::mcsv1_UDAF* getFunction() const; EXPORT boost::shared_ptr getUserDataSP(); + EXPORT void setParamCount(int32_t paramCount); }; // Since aggregate functions can operate on any data type, we use the following structure @@ -419,9 +421,10 @@ public: struct ColumnDatum { CalpontSystemCatalog::ColDataType dataType; // defined in calpontsystemcatalog.h - static_any::any columnData; + static_any::any columnData; // Not valid in init() uint32_t scale; // If dataType is a DECIMAL type uint32_t precision; // If dataType is a DECIMAL type + std::string alias; // Only filled in for init() ColumnDatum() : dataType(CalpontSystemCatalog::UNDEFINED), scale(0), precision(-1) {}; }; @@ -466,7 +469,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes) = 0; + ColumnDatum* colTypes) = 0; /** * reset() @@ -501,8 +504,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn) = 0; + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn) = 0; /** * subEvaluate() @@ -579,8 +581,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() @@ -640,32 +641,32 @@ inline mcsv1Context::mcsv1Context() : fEndFrame(WF_CURRENT_ROW), fStartConstant(0), fEndConstant(0), - func(NULL) + func(NULL), + fParamCount(0) { } inline mcsv1Context::mcsv1Context(const mcsv1Context& rhs) : - fContextFlags(0), - fColWidth(0), - dataFlags(NULL), - bInterrupted(NULL), - func(NULL) + dataFlags(NULL) { copy(rhs); } inline mcsv1Context& mcsv1Context::copy(const mcsv1Context& rhs) { - fRunFlags = rhs.getRunFlags(); - fResultType = rhs.getResultType(); - fUserDataSize = rhs.getUserDataSize(); - fResultscale = rhs.getScale(); - fResultPrecision = rhs.getPrecision(); + fRunFlags = rhs.fRunFlags; + fContextFlags = rhs.fContextFlags; + fResultType = rhs.fResultType; + fUserDataSize = rhs.fUserDataSize; + fColWidth = rhs.fColWidth; + fResultscale = rhs.fResultscale; + fResultPrecision = rhs.fResultPrecision; rhs.getStartFrame(fStartFrame, fStartConstant); rhs.getEndFrame(fEndFrame, fEndConstant); - functionName = rhs.getName(); - bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference - func = rhs.func; + functionName = rhs.functionName; + bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference + func = rhs.func; + fParamCount = rhs.fParamCount; return *this; } @@ -675,11 +676,7 @@ inline mcsv1Context::~mcsv1Context() inline mcsv1Context& mcsv1Context::operator=(const mcsv1Context& rhs) { - fContextFlags = 0; - fColWidth = 0; dataFlags = NULL; - bInterrupted = NULL; - func = NULL; return copy(rhs); } @@ -753,16 +750,13 @@ inline bool mcsv1Context::isPM() inline size_t mcsv1Context::getParameterCount() const { - if (dataFlags) - return dataFlags->size(); - - return 0; + return fParamCount; } inline bool mcsv1Context::isParamNull(int paramIdx) { if (dataFlags) - return (*dataFlags)[paramIdx] & PARAM_IS_NULL; + return dataFlags[paramIdx] & PARAM_IS_NULL; return false; } @@ -770,7 +764,7 @@ inline bool mcsv1Context::isParamNull(int paramIdx) inline bool mcsv1Context::isParamConstant(int paramIdx) { if (dataFlags) - return (*dataFlags)[paramIdx] & PARAM_IS_CONSTANT; + return dataFlags[paramIdx] & PARAM_IS_CONSTANT; return false; } @@ -939,18 +933,22 @@ inline uint32_t mcsv1Context::getUserDataSize() const return fUserDataSize; } -inline std::vector& mcsv1Context::getDataFlags() +inline uint32_t* mcsv1Context::getDataFlags() { - return *dataFlags; + return dataFlags; } -inline void mcsv1Context::setDataFlags(std::vector* flags) +inline void mcsv1Context::setDataFlags(uint32_t* flags) { dataFlags = flags; } -inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, - std::vector& valsDropped) +inline void mcsv1Context::setParamCount(int32_t paramCount) +{ + fParamCount = paramCount; +} + +inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { return NOT_IMPLEMENTED; } diff --git a/utils/udfsdk/median.cpp b/utils/udfsdk/median.cpp index e32d721f1..9c7e72dc3 100644 --- a/utils/udfsdk/median.cpp +++ b/utils/udfsdk/median.cpp @@ -25,9 +25,9 @@ using namespace mcsv1sdk; mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("median() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode median::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; MEDIAN_DATA& data = static_cast(context->getUserData())->mData; @@ -212,8 +211,7 @@ mcsv1_UDAF::ReturnCode median::evaluate(mcsv1Context* context, static_any::any& return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; MEDIAN_DATA& data = static_cast(context->getUserData())->mData; diff --git a/utils/udfsdk/median.h b/utils/udfsdk/median.h index d64792461..142be6ba8 100644 --- a/utils/udfsdk/median.h +++ b/utils/udfsdk/median.h @@ -134,7 +134,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -169,8 +169,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -246,8 +245,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() diff --git a/utils/udfsdk/ssq.cpp b/utils/udfsdk/ssq.cpp index 4d9ef7e10..20fdc33db 100644 --- a/utils/udfsdk/ssq.cpp +++ b/utils/udfsdk/ssq.cpp @@ -34,9 +34,9 @@ struct ssq_data #define OUT_TYPE int64_t mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -44,13 +44,13 @@ mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("ssq() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -81,8 +81,7 @@ mcsv1_UDAF::ReturnCode ssq::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; @@ -183,8 +182,7 @@ mcsv1_UDAF::ReturnCode ssq::evaluate(mcsv1Context* context, static_any::any& val return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; diff --git a/utils/udfsdk/ssq.h b/utils/udfsdk/ssq.h index 514c7a3f0..2cac61c2c 100644 --- a/utils/udfsdk/ssq.h +++ b/utils/udfsdk/ssq.h @@ -114,7 +114,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -147,8 +147,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -224,8 +223,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); protected: }; diff --git a/utils/udfsdk/udfmysql.cpp b/utils/udfsdk/udfmysql.cpp index 981651c43..dc0277ccc 100644 --- a/utils/udfsdk/udfmysql.cpp +++ b/utils/udfsdk/udfmysql.cpp @@ -490,6 +490,168 @@ extern "C" // return data->sumsq; return 0; } + +//======================================================================= + + /** + * regr_avgx connector stub + */ + struct regr_avgx_data + { + double sumx; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_avgx_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_avgx() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // TODO test for NULL in x and y + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + ++data->cnt; + data->sumx += xval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } + +//======================================================================= + + /** + * avgx connector stub. Exactly the same functionality as the + * built in avg() function. Use to test the performance of the + * API + */ + struct avgx_data + { + double sumx; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct avgx_data* data; + if (args->arg_count != 1) + { + strcpy(message,"avgx() requires one argument"); + return 1; + } + + if (!(data = (struct avgx_data*) malloc(sizeof(struct avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct avgx_data* data = (struct avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // TODO test for NULL in x and y + struct avgx_data* data = (struct avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + ++data->cnt; + data->sumx += xval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct avgx_data* data = (struct avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } } // vim:ts=4 sw=4: diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index 664b0e7de..fe1f3fd0e 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -204,8 +204,10 @@ Filters="*.c;*.C;*.cc;*.cpp;*.cp;*.cxx;*.c++;*.prg;*.pas;*.dpr;*.asm;*.s;*.bas;*.java;*.cs;*.sc;*.e;*.cob;*.html;*.rc;*.tcl;*.py;*.pl;*.d"> + + @@ -215,8 +217,10 @@ Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if"> + + diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index f302c49cd..5cd5243c5 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -52,6 +52,7 @@ using namespace joblist; namespace windowfunction { + template boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context) { @@ -142,7 +143,7 @@ template void WF_udaf::resetData() { getContext().getFunction()->reset(&getContext()); - fSet.clear(); + fDistinctSet.clear(); WindowFunctionType::resetData(); } @@ -150,8 +151,8 @@ template void WF_udaf::parseParms(const std::vector& parms) { bRespectNulls = true; - // parms[1]: respect null | ignore null - ConstantColumn* cc = dynamic_cast(parms[1].get()); + // The last parms: respect null | ignore null + ConstantColumn* cc = dynamic_cast(parms[parms.size()-1].get()); idbassert(cc != NULL); bool isNull = false; // dummy, harded coded bRespectNulls = (cc->getIntVal(fRow, isNull) > 0); @@ -167,52 +168,71 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) } mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - uint64_t colOut = fFieldIndex[0]; - uint64_t colIn = fFieldIndex[1]; - mcsv1sdk::ColumnDatum datum; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colOut); + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC); + + // Put the parameter metadata (type, scale, precision) into valsIn + mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) + { + uint64_t colIn = fFieldIndex[i+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } for (int64_t i = b; i < e; i++) { if (i % 1000 == 0 && fStep->cancelled()) break; + bool bHasNull = false; fRow.setData(getPointer(fRowData->at(i))); // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; + uint32_t flags[getContext().getParameterCount()]; - if (fRow.isNullValue(colIn) == true) + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - if (!bRespectNulls) + uint64_t colIn = fFieldIndex[k+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[k]; + flags[k] = 0; + if (fRow.isNullValue(colIn) == true) { - continue; + if (!bRespectNulls) + { + bHasNull = true; + break; + } + + flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - flag |= mcsv1sdk::PARAM_IS_NULL; + T valIn; + getValue(colIn, valIn, &datum.dataType); + + // Check for distinct, if turned on. + // Currently, distinct only works for param 1 + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + + datum.columnData = valIn; } - - flags.push_back(flag); - getContext().setDataFlags(&flags); - - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - // TODO: when we impliment distinct, we need to revist this. - if ((fDistinct) || (fSet.find(valIn) != fSet.end())) + if (bHasNull) { continue; } - datum.columnData = valIn; - - std::vector valsIn; - valsIn.push_back(datum); - rc = getContext().getFunction()->dropValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::NOT_IMPLEMENTED) @@ -442,59 +462,191 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) else if (fPrev <= e && fPrev > c) e = c; - uint64_t colIn = fFieldIndex[1]; + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC); - mcsv1sdk::ColumnDatum datum; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colOut); + // Put the parameter metadata (type, scale, precision) into valsIn + mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) + { + uint64_t colIn = fFieldIndex[i+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } if (b <= c && c <= e) getContext().setContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); else getContext().clearContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); - + bool bHasNull = false; for (int64_t i = b; i <= e; i++) { if (i % 1000 == 0 && fStep->cancelled()) break; fRow.setData(getPointer(fRowData->at(i))); - // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; - if (fRow.isNullValue(colIn) == true) + // NULL flags + uint32_t flags[getContext().getParameterCount()]; + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - if (!bRespectNulls) + uint64_t colIn = fFieldIndex[k+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[k]; + + // Turn on Null flags or skip based on respect nulls + flags[k] = 0; + if (fRow.isNullValue(colIn) == true) + { + if (!bRespectNulls) + { + bHasNull = true; + break; + } + + flags[k] |= mcsv1sdk::PARAM_IS_NULL; + } + + // MCOL-1201 Multi-Paramter calls + switch (datum.dataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + { + int64_t valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + case CalpontSystemCatalog::UDECIMAL: + { + uint64_t valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + { + double valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + { + float valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::BLOB: + { + string valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + default: + { + string errStr = "(" + colType2String[i] + ")"; + errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + + break; + } + } + // Skip if any value is NULL and respect nulls is off. + if (bHasNull) { continue; } - - flag |= mcsv1sdk::PARAM_IS_NULL; } - - flags.push_back(flag); - getContext().setDataFlags(&flags); - - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - if ((fDistinct) || (fSet.find(valIn) != fSet.end())) - { - continue; - } - - if (fDistinct) - fSet.insert(valIn); - - datum.columnData = valIn; - - std::vector valsIn; - valsIn.push_back(datum); - + getContext().setDataFlags(flags); + rc = getContext().getFunction()->nextValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) diff --git a/utils/windowfunction/wf_udaf.h b/utils/windowfunction/wf_udaf.h index babb32565..f7a4c4b08 100644 --- a/utils/windowfunction/wf_udaf.h +++ b/utils/windowfunction/wf_udaf.h @@ -21,13 +21,35 @@ #ifndef UTILS_WF_UDAF_H #define UTILS_WF_UDAF_H -#include +#ifndef _MSC_VER +#include +#else +#include +#endif #include "windowfunctiontype.h" #include "mcsv1_udaf.h" namespace windowfunction { +// Hash classes for the distinct hashmap +class DistinctHasher +{ +public: + inline size_t operator()(const static_any::any& a) const + { + return a.getHash(); + } +}; + +class DistinctEqual +{ +public: + inline bool operator()(const static_any::any& lhs, static_any::any& rhs) const + { + return lhs == rhs; + } +}; // A class to control the execution of User Define Analytic Functions (UDAnF) // as defined by a specialization of mcsv1sdk::mcsv1_UDAF @@ -72,7 +94,8 @@ protected: bool fDistinct; bool bRespectNulls; // respect null | ignore null bool bHasDropValue; // Set to false when we discover the UDAnF doesn't implement dropValue. - std::set fSet; // To hold distinct values + // To hold distinct values + std::tr1::unordered_set fDistinctSet; static_any::any fValOut; // The return value public: diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp index 950045899..4c5b4de32 100644 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -492,10 +492,10 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos) static uint64_t dateNull = joblist::DATENULL; static uint64_t datetimeNull = joblist::DATETIMENULL; static uint64_t timeNull = joblist::TIMENULL; - static uint64_t char1Null = joblist::CHAR1NULL; - static uint64_t char2Null = joblist::CHAR2NULL; - static uint64_t char4Null = joblist::CHAR4NULL; - static uint64_t char8Null = joblist::CHAR8NULL; +// static uint64_t char1Null = joblist::CHAR1NULL; +// static uint64_t char2Null = joblist::CHAR2NULL; +// static uint64_t char4Null = joblist::CHAR4NULL; +// static uint64_t char8Null = joblist::CHAR8NULL; static string stringNull(""); void* v = NULL; diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index 5d3dfec85..41c788693 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -1280,7 +1280,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -2024,10 +2024,10 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 0; k < colStructList.size(); k++) + for (size_t k = 0; k < colStructList.size(); k++) { // Skip the selected column - if (k == colId) + if (k == (size_t)colId) continue; Column expandCol; @@ -2582,7 +2582,7 @@ int WriteEngineWrapper::insertColumnRec_SYS(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -3277,7 +3277,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; From 30f9aa71cd1c14b3dc612ab9c518655669d3090d Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 11 May 2018 09:53:49 -0500 Subject: [PATCH 02/19] MCOL-1201 Add test UDAF back in after rebase --- utils/udfsdk/avgx.cpp | 257 +++++++++++++++++++++++++++++++++++ utils/udfsdk/avgx.h | 99 ++++++++++++++ utils/udfsdk/regr_avgx.cpp | 270 +++++++++++++++++++++++++++++++++++++ utils/udfsdk/regr_avgx.h | 99 ++++++++++++++ 4 files changed, 725 insertions(+) create mode 100644 utils/udfsdk/avgx.cpp create mode 100644 utils/udfsdk/avgx.h create mode 100644 utils/udfsdk/regr_avgx.cpp create mode 100644 utils/udfsdk/regr_avgx.h diff --git a/utils/udfsdk/avgx.cpp b/utils/udfsdk/avgx.cpp new file mode 100644 index 000000000..887a8418e --- /dev/null +++ b/utils/udfsdk/avgx.cpp @@ -0,0 +1,257 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "avgx.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +#define DATATYPE double + +// Use the simple data model +struct avgx_data +{ + double sum; + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode avgx::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 1) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("avgx() with other than 1 arguments"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[0].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("avgx() with a non-numeric x argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(avgx_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 4); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode avgx::reset(mcsv1Context* context) +{ + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + data->sum = 0; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_x = valsIn[0].columnData; + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[0].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum += val; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct avgx_data* outData = (struct avgx_data*)context->getUserData()->data; + struct avgx_data* inData = (struct avgx_data*)userDataIn->data; + + outData->sum += inData->sum; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + + valOut = data->sum / (double)data->cnt; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_x = valsDropped[0].columnData; + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[0].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum -= val; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/avgx.h b/utils/udfsdk/avgx.h new file mode 100644 index 000000000..0569b6091 --- /dev/null +++ b/utils/udfsdk/avgx.h @@ -0,0 +1,99 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* avgx.h +***********************************************************************/ + +/** + * Columnstore interface for for the avgx function + * + * + * CREATE AGGREGATE FUNCTION avgx returns REAL soname + * 'libudf_mysql.so'; + * + */ +#ifndef HEADER_avgx +#define HEADER_avgx + +#include +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. + +// Return the avgx value of the dataset + +class avgx : public mcsv1_UDAF +{ +public: + // Defaults OK + avgx() : mcsv1_UDAF() {}; + virtual ~avgx() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_.h + diff --git a/utils/udfsdk/regr_avgx.cpp b/utils/udfsdk/regr_avgx.cpp new file mode 100644 index 000000000..c7cc5b56e --- /dev/null +++ b/utils/udfsdk/regr_avgx.cpp @@ -0,0 +1,270 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_avgx.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +#define DATATYPE double + +// Use the simple data model +struct regr_avgx_data +{ + double sum; + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode regr_avgx::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgx() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[1].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgx() with a non-numeric x argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_avgx_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[1].scale + 4); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_avgx::reset(mcsv1Context* context) +{ + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + data->sum = 0; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (context->isParamNull(0) || context->isParamNull(1)) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + if (valIn_x.empty() || valIn_y.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[1].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum += val; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_avgx_data* outData = (struct regr_avgx_data*)context->getUserData()->data; + struct regr_avgx_data* inData = (struct regr_avgx_data*)userDataIn->data; + + outData->sum += inData->sum; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + + if (data->cnt == 0) + { + valOut = 0; + } + else + { + valOut = data->sum / (double)data->cnt; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty() || valIn_y.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[1].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum -= val; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/regr_avgx.h b/utils/udfsdk/regr_avgx.h new file mode 100644 index 000000000..f70f30d8c --- /dev/null +++ b/utils/udfsdk/regr_avgx.h @@ -0,0 +1,99 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_avgx.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_avgx function + * + * + * CREATE AGGREGATE FUNCTION regr_avgx returns REAL soname + * 'libudf_mysql.so'; + * + */ +#ifndef HEADER_regr_avgx +#define HEADER_regr_avgx + +#include +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. + +// Return the regr_avgx value of the dataset + +class regr_avgx : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_avgx() : mcsv1_UDAF() {}; + virtual ~regr_avgx() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_avgx.h + From 6ccfbb2a236f8106b93734d018899f3653d9b526 Mon Sep 17 00:00:00 2001 From: David Hall Date: Mon, 14 May 2018 17:28:24 -0500 Subject: [PATCH 03/19] MCOL-1201 some fixes from testing --- dbcon/joblist/tupleaggregatestep.cpp | 229 ++++++++++++--------------- dbcon/mysql/ha_calpont_execplan.cpp | 1 - utils/common/common.vpj | 2 + utils/rowgroup/rowaggregation.cpp | 4 +- 4 files changed, 106 insertions(+), 130 deletions(-) diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 21c7c0af6..00fa26a4c 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -852,7 +852,6 @@ SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo) if (ac->aggOp() == ROWAGG_UDAF) { UDAFColumn* udafc = dynamic_cast(ac); - if (udafc) { constAggDataVec.push_back( @@ -1097,8 +1096,9 @@ void TupleAggregateStep::prep1PhaseAggregate( vector functionVec; uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); + // For UDAF uint32_t projColsUDAFIndex = 0; - + UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // for count column of average function map avgFuncMap; @@ -1287,12 +1287,10 @@ void TupleAggregateStep::prep1PhaseAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -1300,12 +1298,10 @@ void TupleAggregateStep::prep1PhaseAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, i)); break; } - } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -1474,8 +1470,6 @@ void TupleAggregateStep::prep1PhaseAggregate( throw logic_error("(2)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } - pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); - // Return column oidsAgg.push_back(oidsProj[colProj]); keysAgg.push_back(key); @@ -1677,8 +1671,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( uint32_t bigIntWidth = sizeof(int64_t); // map key = column key, operation (enum), and UDAF pointer if UDAF. AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; set avgSet; + + // fOR udaf + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; uint32_t projColsUDAFIndex = 0; // for count column of average function @@ -1847,7 +1844,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; if (udafc) @@ -1857,12 +1854,10 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAgg)); break; } - } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -2142,6 +2137,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // locate the return column position in aggregated rowgroup for (uint64_t i = 0; i < returnedColVec.size(); i++) { + udafc = NULL; pUDAFFunc = NULL; uint32_t retKey = returnedColVec[i].first; RowAggFunctionType aggOp = functionIdMap(returnedColVec[i].second); @@ -2150,10 +2146,21 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != @@ -2473,26 +2480,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - - for (; it != jobInfo.projectionCols.end(); it++) - { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); - break; - } - } - - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(3)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); - } + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); } else { @@ -2904,7 +2892,10 @@ void TupleAggregateStep::prep2PhasesAggregate( vector > aggColVec; set avgSet; vector >& returnedColVec = jobInfo.returnedColVec; + // For UDAF uint32_t projColsUDAFIndex = 0; + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -2947,7 +2938,6 @@ void TupleAggregateStep::prep2PhasesAggregate( uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // associate the columns between projected RG and aggregate RG on PM // populated the aggregate columns @@ -3084,12 +3074,10 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3098,10 +3086,9 @@ void TupleAggregateStep::prep2PhasesAggregate( break; } } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -3350,10 +3337,6 @@ void TupleAggregateStep::prep2PhasesAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { - // Keep a count of the parms after the first for any aggregate. - // These will be skipped and the count needs to be subtracted - // from where the aux column will be. - int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap; @@ -3369,6 +3352,8 @@ void TupleAggregateStep::prep2PhasesAggregate( } // locate the return column position in aggregated rowgroup from PM + // outIdx is i without the multi-columns, + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { uint32_t retKey = returnedColVec[i].first; @@ -3379,19 +3364,30 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_MULTI_PARM) { // Skip on UM: Extra parms for an aggregate have no work on the UM - ++multiParms; continue; } + // Is this a UDAF? use the function as part of the key. - - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; - + pUDAFFunc = NULL; + udafc = NULL; if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); @@ -3492,7 +3488,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep2PhasesAggregate: " << emsg << " oid=" @@ -3514,7 +3510,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByUm[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByUm[j]->fOutputColumnIndex = i; + groupByUm[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByUm[j]->fOutputColumnIndex; } @@ -3525,7 +3521,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (jobInfo.distinctColVec[j] == retKey) { if (groupByUm[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByUm[j]->fOutputColumnIndex = i; + groupByUm[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByUm[j]->fOutputColumnIndex; } @@ -3534,7 +3530,7 @@ void TupleAggregateStep::prep2PhasesAggregate( // a duplicate group by column if (dupGroupbyIndex != -1) functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } else { @@ -3542,30 +3538,11 @@ void TupleAggregateStep::prep2PhasesAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - - for (; it != jobInfo.projectionCols.end(); it++) - { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i-multiParms)); - break; - } - } - - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); - } + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i-multiParms)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -3600,6 +3577,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (returnedColVec[i].second == AggregateColumn::AVG) avgFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // now fix the AVG function, locate the count(column) position @@ -3617,7 +3595,7 @@ void TupleAggregateStep::prep2PhasesAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size() - multiParms; + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -3724,7 +3702,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector > aggColVec, aggNoDistColVec; set avgSet, avgDistSet; vector >& returnedColVec = jobInfo.returnedColVec; + // For UDAF uint32_t projColsUDAFIndex = 0; + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -3796,7 +3777,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( uint32_t bigIntWidth = sizeof(int64_t); map, uint64_t> avgFuncDistMap; AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // associate the columns between projected RG and aggregate RG on PM // populated the aggregate columns @@ -3940,12 +3920,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3954,10 +3932,9 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( break; } } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -4201,32 +4178,33 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // associate the columns between the aggregate RGs on PM and UM without distinct aggregator // populated the returned columns { + int64_t multiParms = 0; + for (uint32_t idx = 0; idx < groupByPm.size(); idx++) { SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(idx, idx)); groupByUm.push_back(groupby); } - // Keep a count of the parms after the first for any aggregate. - // These will be skipped and the count needs to be subtracted - // from where the aux column will be. - int64_t multiParms = 0; for (uint32_t idx = 0; idx < functionVecPm.size(); idx++) - { SP_ROWAGG_FUNC_t funct; SP_ROWAGG_FUNC_t funcPm = functionVecPm[idx]; - // UDAF support if (funcPm->fAggFunction == ROWAGG_MULTI_PARM) { - // Multi-Parm is not used on the UM + // Skip on UM: Extra parms for an aggregate have no work on the UM ++multiParms; continue; } + if (funcPm->fAggFunction == ROWAGG_UDAF) { RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); + if (!udafFuncCol) + { + throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + } funct.reset(new RowUDAFFunctionCol( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, @@ -4273,6 +4251,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // These will be skipped and the count needs to be subtracted // from where the aux column will be. int64_t multiParms = 0; + projColsUDAFIndex = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4286,9 +4265,12 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // locate the return column position in aggregated rowgroup from PM + // outIdx is i without the multi-columns, + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { pUDAFFunc = NULL; + udafc = NULL; uint32_t retKey = returnedColVec[i].first; RowAggFunctionType aggOp = functionIdMap(returnedColVec[i].second); @@ -4304,10 +4286,21 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != @@ -4436,7 +4429,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it != aggFuncMap.end()) { - colUm = it->second - multiParms; + colUm = it->second; oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(keysAggUm[colUm]); scaleAggDist.push_back(scaleAggUm[colUm]); @@ -4460,7 +4453,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // false alarm returnColMissing = false; - colUm = it->second - multiParms; + colUm = it->second; if (aggOp == ROWAGG_SUM) { @@ -4528,7 +4521,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep2PhasesDistinctAggregate: " << emsg << " oid=" @@ -4552,7 +4545,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByNoDist[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByNoDist[j]->fOutputColumnIndex = i; + groupByNoDist[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByNoDist[j]->fOutputColumnIndex; } @@ -4561,7 +4554,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // a duplicate group by column if (dupGroupbyIndex != -1) functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } else { @@ -4569,30 +4562,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - - for (; it != jobInfo.projectionCols.end(); it++) - { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i-multiParms)); - break; - } - } - - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); - } + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i-multiParms)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -4629,6 +4603,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // for (i // now fix the AVG function, locate the count(column) position @@ -4646,7 +4621,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size() - multiParms; + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -4706,7 +4681,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(5)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 86dc0bd2f..ec08223b0 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4570,7 +4570,6 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) udafc->resultType(ct); } } - } catch (std::logic_error e) { diff --git a/utils/common/common.vpj b/utils/common/common.vpj index 69059884c..ea67e04ba 100755 --- a/utils/common/common.vpj +++ b/utils/common/common.vpj @@ -200,6 +200,7 @@ + @@ -208,6 +209,7 @@ Name="Header Files" Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if"> + diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index c1f5bbd63..043dcaac2 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -2015,13 +2015,13 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { - int32_t paramCount = fRGContext.getParameterCount(); + uint32_t paramCount = fRGContext.getParameterCount(); // The vector of parameters to be sent to the UDAF mcsv1sdk::ColumnDatum valsIn[paramCount]; uint32_t dataFlags[paramCount]; execplan::CalpontSystemCatalog::ColDataType colDataType; - for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) + for (uint32_t i = 0; i < paramCount; ++i) { mcsv1sdk::ColumnDatum& datum = valsIn[i]; // Turn on NULL flags From 1c7ec0ddc6fe8f74c44cee1bc773f59e76cc37c8 Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 15 May 2018 13:15:45 -0500 Subject: [PATCH 04/19] MCOL-1201 Modify docs. Fix group concat bug --- dbcon/mysql/ha_calpont_execplan.cpp | 1 + utils/udfsdk/docs/source/changelog.rst | 1 + .../docs/source/reference/ColumnDatum.rst | 6 ++-- .../docs/source/reference/MariaDBUDAF.rst | 2 +- .../udfsdk/docs/source/reference/UDAFMap.rst | 2 +- .../docs/source/reference/mcsv1Context.rst | 2 +- .../docs/source/reference/mcsv1_UDAF.rst | 36 ++++++++----------- utils/udfsdk/docs/source/usage/cmakelists.rst | 2 +- utils/udfsdk/docs/source/usage/compile.rst | 2 +- utils/udfsdk/docs/source/usage/headerfile.rst | 6 ++-- .../udfsdk/docs/source/usage/introduction.rst | 4 +-- utils/udfsdk/docs/source/usage/sourcefile.rst | 29 +++++++-------- utils/udfsdk/udfsdk.vpj | 33 +++++++++++++++++ 13 files changed, 75 insertions(+), 51 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index ec08223b0..701e1c14f 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4162,6 +4162,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) rowCol->columnVec(selCols); (dynamic_cast(ac))->orderCols(orderCols); parm.reset(rowCol); + ac->aggParms().push_back(parm); if (gc->str_separator()) { diff --git a/utils/udfsdk/docs/source/changelog.rst b/utils/udfsdk/docs/source/changelog.rst index fcd93d54c..1a7c749f9 100644 --- a/utils/udfsdk/docs/source/changelog.rst +++ b/utils/udfsdk/docs/source/changelog.rst @@ -5,4 +5,5 @@ Version History | Version | Date | Changes | +=========+============+=============================+ | 1.1.0α | 2017-08-25 | - First alpha release | +| 1.2.0α | 2016-05-18 | - Add multi parm support | +---------+------------+-----------------------------+ diff --git a/utils/udfsdk/docs/source/reference/ColumnDatum.rst b/utils/udfsdk/docs/source/reference/ColumnDatum.rst index dd1006363..5304a2953 100644 --- a/utils/udfsdk/docs/source/reference/ColumnDatum.rst +++ b/utils/udfsdk/docs/source/reference/ColumnDatum.rst @@ -1,3 +1,5 @@ +.. _ColumnDatum: + ColumnDatum =========== @@ -13,7 +15,7 @@ Example for int data: int myint = valIn.cast(); -For multi-paramter aggregations (not available in Columnstore 1.1), the colsIn vector of next_value() contains the ordered set of row parameters. +For multi-paramter aggregations (not available in Columnstore 1.1), the colsIn array of next_value() contains the ordered set of row parameters. For char, varchar, text, varbinary and blob types, columnData will be std::string. @@ -59,7 +61,7 @@ The provided values are: * - SMALLINT - A signed two byte integer * - DECIMAL - - A Columnstore Decimal value. For Columnstore 1.1, this is stored in the smallest integer type field that will hold the required precision. + - A Columnstore Decimal value. This is stored in the smallest integer type field that will hold the required precision. * - MEDINT - A signed four byte integer * - INT diff --git a/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst b/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst index 1f6fa7acb..d031705d8 100644 --- a/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst +++ b/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst @@ -13,7 +13,7 @@ The library placed in mysql/lib is the name you use in the SQL CREATE AGGREGATE CREATE AGGREGATE FUNCTION ssq returns REAL soname 'libudf_mysql.so'; -Unlike the code you write for the Columnstore UDAF, MariaDB does not handle allocation and de-allocation of your memory structures. If writing your function for other engines, you must handle allocation and de-alloaction in :ref:`function_init ` and :ref:`function_deinit ` +Unlike the code you write for the Columnstore UDAF, MariaDB does not handle allocation and de-allocation of your memory structures in other engines. If writing your function for other engines, you must handle allocation and de-alloaction in :ref:`function_init ` and :ref:`function_deinit ` All of the MariaDB UDF and UDAF example functions are in a single source file named udfmysql.cpp and linked into libudf_mysql.so. diff --git a/utils/udfsdk/docs/source/reference/UDAFMap.rst b/utils/udfsdk/docs/source/reference/UDAFMap.rst index 48706bab3..d3cda63f4 100644 --- a/utils/udfsdk/docs/source/reference/UDAFMap.rst +++ b/utils/udfsdk/docs/source/reference/UDAFMap.rst @@ -3,7 +3,7 @@ UDAFMap ======= -The UDAFMap is where we tell the system about our function. For Columnstore 1.1, you must manually place your function into this map. +The UDAFMap is where we tell the system about our function. For Columnstore 1.2, you must manually place your function into this map. * open mcsv1_udaf.cpp * add your header to the #include list diff --git a/utils/udfsdk/docs/source/reference/mcsv1Context.rst b/utils/udfsdk/docs/source/reference/mcsv1Context.rst index 279220fb3..02adf57ab 100644 --- a/utils/udfsdk/docs/source/reference/mcsv1Context.rst +++ b/utils/udfsdk/docs/source/reference/mcsv1Context.rst @@ -150,7 +150,7 @@ Use these to determine the way your UDA(n)F was called .. c:function:: size_t getParameterCount() const; -:returns: the number of parameters to the function in the SQL query. Columnstore 1.1 only supports one parameter. +:returns: the number of parameters to the function in the SQL query. .. c:function:: bool isParamNull(int paramIdx); diff --git a/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst b/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst index 73c8f6570..f75fe73fc 100644 --- a/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst +++ b/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst @@ -1,4 +1,4 @@ -.. _ mcsv1_udaf: +.. _mcsv1_udaf: mcsv1_UDAF ========== @@ -11,12 +11,14 @@ The base class has no data members. It is designed to be only a container for yo However, adding static const members makes sense. -For UDAF (not Wndow Functions) Aggregation takes place in three stages: +For UDAF (not Window Functions) Aggregation takes place in three stages: * Subaggregation on the PM. nextValue() * Consolodation on the UM. subevaluate() * Evaluation of the function on the UM. evaluate() +There are situations where the system makes a choice to perform all UDAF calculations on the UM. The presence of group_concat() in the query and certain joins can cause the optimizer to make this choice. + For Window Functions, all aggregation occurs on the UM, and thus the subevaluate step is skipped. There is an optional dropValue() function that may be added. * Aggregation on the UM. nextValue() @@ -80,17 +82,11 @@ Callback Methods .. _init: -.. c:function:: ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); +.. c:function:: ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); :param context: The context object for this call. -:param colTypes: A list of the column types of the parameters. - - COL_TYPES is defined as:: - - typedef std::vector >COL_TYPES; - - In Columnstore 1.1, only one column is supported, so colTyoes will be of length one. +:param colTypes: A list of ColumnDatum structures. Use this to access the column types of the parameters. colTypes.columnData will be invalid. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS @@ -116,25 +112,23 @@ Callback Methods .. _nextvalue: -.. c:function:: ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); +.. c:function:: ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); :param context: The context object for this call -:param valsIn: a vector representing the values to be added for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. - +:param valsIn: an array representing the values to be added for each parameter for this row. + :returns: ReturnCode::ERROR or ReturnCode::SUCCESS Use context->getUserData() and type cast it to your UserData type or Simple Data Model stuct. nextValue() is called for each Window movement that passes the WHERE and HAVING clauses. The context's UserData will contain values that have been sub-aggregated to this point for the group, partition or Window Frame. nextValue is called on the PM for aggregation and on the UM for Window Functions. - When used in an aggregate, the function may not rely on order or completeness since the sub-aggregation is going on at the PM, it only has access to the data stored on the PM's dbroots. + When used in an aggregate, the function should not rely on order or completeness since the sub-aggregation is going on at the PM, it only has access to the data stored on the PM's dbroots. - When used as a analytic function (Window Function), nextValue is call for each Window movement in the Window. If dropValue is defined, then it may be called for every value leaving the Window, and nextValue called for each new value entering the Window. + When used as a analytic function (Window Function), nextValue is called for each Window movement in the Window. If dropValue is defined, then it may be called for every value leaving the Window, and nextValue called for each new value entering the Window. - Since this is called for every row, it is important that this method be efficient. + Since this may called for every row, it is important that this method be efficient. .. _subevaluate: @@ -172,13 +166,11 @@ Callback Methods .. _dropvalue: -.. c:function:: ReturnCode dropValue(mcsv1Context* context, std::vector& valsDropped); +.. c:function:: ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); :param context: The context object for this call -:param valsDropped: a vector representing the values to be dropped for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. +:param valsDropped: an array representing the values to be dropped for each parameter for this row. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS diff --git a/utils/udfsdk/docs/source/usage/cmakelists.rst b/utils/udfsdk/docs/source/usage/cmakelists.rst index 32a218459..a7ddacbaf 100644 --- a/utils/udfsdk/docs/source/usage/cmakelists.rst +++ b/utils/udfsdk/docs/source/usage/cmakelists.rst @@ -3,7 +3,7 @@ CMakeLists.txt ============== -For Columnstore 1.1, you compile your function by including it in the CMakeLists.txt file for the udfsdk. +For Columnstore 1.2, you compile your function by including it in the CMakeLists.txt file for the udfsdk. You need only add the new .cpp files to the udfsdk_LIB_SRCS target list:: diff --git a/utils/udfsdk/docs/source/usage/compile.rst b/utils/udfsdk/docs/source/usage/compile.rst index e6319e45b..b96af5d80 100644 --- a/utils/udfsdk/docs/source/usage/compile.rst +++ b/utils/udfsdk/docs/source/usage/compile.rst @@ -3,7 +3,7 @@ Compile ======= -To compile your function for Columnstore 1.1, simple recompile the udfsdk directory:: +To compile your function for Columnstore 1.2, simply recompile the udfsdk directory:: cd utils/usdsdk cmake . diff --git a/utils/udfsdk/docs/source/usage/headerfile.rst b/utils/udfsdk/docs/source/usage/headerfile.rst index 720acc5be..afb043e98 100644 --- a/utils/udfsdk/docs/source/usage/headerfile.rst +++ b/utils/udfsdk/docs/source/usage/headerfile.rst @@ -5,7 +5,7 @@ Header file Usually, each UDA(n)F function will have one .h and one .cpp file plus code for the mariadb UDAF plugin which may or may not be in a separate file. It is acceptable to put a set of related functions in the same files or use separate files for each. -The easiest way to create these files is to copy them an example closest to the type of function you intend to create. +The easiest way to create these files is to copy them from an example closest to the type of function you intend to create. Your header file must have a class defined that will implement your function. This class must be derived from mcsv1_UDAF and be in the mcsv1sdk namespace. The following examples use the "allnull" UDAF. @@ -29,9 +29,9 @@ allnull uses the Simple Data Model. See :ref:`complexdatamodel` to see how that allnull() : mcsv1_UDAF(){}; virtual ~allnull(){}; - virtual ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); virtual ReturnCode reset(mcsv1Context* context); - virtual ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn); virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); }; diff --git a/utils/udfsdk/docs/source/usage/introduction.rst b/utils/udfsdk/docs/source/usage/introduction.rst index 6b3544a1e..19c612caa 100644 --- a/utils/udfsdk/docs/source/usage/introduction.rst +++ b/utils/udfsdk/docs/source/usage/introduction.rst @@ -3,7 +3,7 @@ mcsv1_udaf Introduction mcsv1_udaf is a C++ API for writing User Defined Aggregate Functions (UDAF) and User Defined Analytic Functions (UDAnF) for the MariaDB Columstore engine. -In Columnstore 1.1.0, functions written using this API must be compiled into the udfsdk and udf_mysql libraries of the Columnstore code branch. +In Columnstore 1.2, functions written using this API must be compiled into the udfsdk and udf_mysql libraries of the Columnstore code branch. The API has a number of features. The general theme is, there is a class that represents the function, there is a context under which the function operates, and there is a data store for intermediate values. @@ -18,5 +18,5 @@ The steps required to create a function are: * :ref:`Compile udfsdk `. * :ref:`Copy the compiled libraries ` to the working directories. -In 1.1.0, Columnstore does not have a plugin framework, so the functions have to be compiled into the libraries that Columnstore already loads. +In 1.2, Columnstore does not have a plugin framework, so the functions have to be compiled into the libraries that Columnstore already loads. diff --git a/utils/udfsdk/docs/source/usage/sourcefile.rst b/utils/udfsdk/docs/source/usage/sourcefile.rst index b7ed38a32..5c43f29e4 100644 --- a/utils/udfsdk/docs/source/usage/sourcefile.rst +++ b/utils/udfsdk/docs/source/usage/sourcefile.rst @@ -34,21 +34,17 @@ Or, if using the :ref:`complexdatamodel`, type cast the UserData to your UserDat init() ------ -.. c:function:: ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); +.. c:function:: ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); :param context: The context object for this call. -:param colTypes: A list of the column types of the parameters. +:param colTypes: A list of the ColumnDatum used to access column types of the parameters. In init(), the columnData member is invalid. - COL_TYPES is defined as:: - - typedef std::vector >COL_TYPES; - - see :ref:`ColDataTypes `. In Columnstore 1.1, only one column is supported, so colTyoes will be of length one. + see :ref:`ColumnDatum`. In Columnstore 1.2, An arbitrary number of parameters is supported. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS -The init() method is where you sanity check the input, set the output type and set any run flags for this instance. init() is called one time from the mysqld process. All settings you do here are propagated through the system. +The init() method is where you sanity check the input datatypes, set the output type and set any run flags for this instance. init() is called one time from the mysqld process. All settings you do here are propagated through the system. init() is the exception to type casting the UserData member of context. UserData has not been created when init() is called, so you shouldn't use it here. @@ -60,13 +56,14 @@ If you're using :ref:`simpledatamodel`, you need to set the size of the structur .. rubric:: Check parameter count and type -Each function expects a certain number of columns to entered as parameters in the SQL query. For columnstore 1.1, the number of parameters is limited to one. +Each function expects a certain number of columns to be entered as parameters in the SQL query. It is possible to create a UDAF that accepts a variable number of parameters. You can discover which ones were actually used in init(), and modify your function's behavior accordingly. -colTypes is a vector of each parameter name and type. The name is the colum name from the SQL query. You can use this information to sanity check for compatible type(s) and also to modify your functions behavior based on type. To do this, add members to your data struct to be tested in the other Methods. Set these members based on colDataTypes (:ref:`ColDataTypes `). +colTypes is an array of ColumnData from which can be gleaned the type and name. The name is the column name from the SQL query. You can use this information to sanity check for compatible type(s) and also to modify your functions behavior based on type. To do this, add members to your data struct to be tested in the other Methods. Set these members based on colDataTypes (:ref:`ColDataTypes `). +The actual number of paramters passed can be gotten from context->getParameterCount(). :: - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -84,7 +81,7 @@ When you create your function using the SQL CREATE FUNCTION command, you must in .. rubric:: Set width and scale -If you have secial requirements, especially if you might be dealing with decimal types:: +If you have special requirements, especially if you might be dealing with decimal types:: context->setColWidth(8); context->setScale(context->getScale()*2); @@ -117,13 +114,11 @@ This function may be called multiple times from both the UM and the PM. Make no nextValue() ----------- -.. c:function:: ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); +.. c:function:: ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); :param context: The context object for this call -:param valsIn: a vector representing the values to be added for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. +:param valsIn: an array representing the values to be added for each parameter for this row. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS @@ -208,7 +203,7 @@ For AVG, you might see:: dropValue --------- -.. c:function:: ReturnCode dropValue(mcsv1Context* context, std::vector& valsDropped); +.. c:function:: ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); :param context: The context object for this call diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index fe1f3fd0e..3d3ac39ca 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -238,5 +238,38 @@ N="Makefile" Type="Makefile"/> + + + + + + + + + + + + + + + + + + + + + + + + + + + From 51df837b4ea89acaac48b87cdebbebb27e5d70eb Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 11 May 2018 09:50:10 -0500 Subject: [PATCH 05/19] MCOL-1201 manual rebase with develop. Obsoletes branch MCOL-1201 --- dbcon/execplan/aggregatecolumn.cpp | 96 +-- dbcon/execplan/aggregatecolumn.h | 44 +- dbcon/joblist/expressionstep.cpp | 12 +- dbcon/joblist/expressionstep.h | 1 + dbcon/joblist/groupconcat.cpp | 2 +- dbcon/joblist/joblistfactory.cpp | 531 ++++++++---- dbcon/joblist/tupleaggregatestep.cpp | 280 +++++-- dbcon/mysql/ha_calpont_execplan.cpp | 858 +++++++++++--------- dbcon/mysql/ha_calpont_impl.cpp | 9 +- dbcon/mysql/ha_window_function.cpp | 37 +- utils/common/any.hpp | 270 +++--- utils/rowgroup/rowaggregation.cpp | 605 +++++++++----- utils/rowgroup/rowaggregation.h | 29 +- utils/udfsdk/CMakeLists.txt | 2 +- utils/udfsdk/allnull.cpp | 7 +- utils/udfsdk/allnull.h | 4 +- utils/udfsdk/avg_mode.cpp | 14 +- utils/udfsdk/avg_mode.h | 14 +- utils/udfsdk/mcsv1_udaf.cpp | 13 +- utils/udfsdk/mcsv1_udaf.h | 88 +- utils/udfsdk/median.cpp | 14 +- utils/udfsdk/median.h | 8 +- utils/udfsdk/ssq.cpp | 14 +- utils/udfsdk/ssq.h | 8 +- utils/udfsdk/udfmysql.cpp | 162 ++++ utils/udfsdk/udfsdk.vpj | 4 + utils/windowfunction/wf_udaf.cpp | 280 +++++-- utils/windowfunction/wf_udaf.h | 27 +- utils/windowfunction/windowfunctiontype.cpp | 8 +- writeengine/wrapper/writeengine.cpp | 10 +- 30 files changed, 2255 insertions(+), 1196 deletions(-) diff --git a/dbcon/execplan/aggregatecolumn.cpp b/dbcon/execplan/aggregatecolumn.cpp index 18cba2607..5bce12d79 100644 --- a/dbcon/execplan/aggregatecolumn.cpp +++ b/dbcon/execplan/aggregatecolumn.cpp @@ -98,36 +98,6 @@ AggregateColumn::AggregateColumn(const uint32_t sessionID): { } -AggregateColumn::AggregateColumn(const AggOp aggOp, ReturnedColumn* parm, const uint32_t sessionID): - ReturnedColumn(sessionID), - fAggOp(aggOp), - fAsc(false), - fData(aggOp + "(" + parm->data() + ")") -{ - fFunctionParms.reset(parm); -} - -AggregateColumn::AggregateColumn(const AggOp aggOp, const string& content, const uint32_t sessionID): - ReturnedColumn(sessionID), - fAggOp(aggOp), - fAsc(false), - fData(aggOp + "(" + content + ")") -{ - // TODO: need to handle distinct - fFunctionParms.reset(new ArithmeticColumn(content)); -} - -// deprecated constructor. use function name as string -AggregateColumn::AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID): - ReturnedColumn(sessionID), - fFunctionName(functionName), - fAggOp(NOOP), - fAsc(false), - fData(functionName + "(" + parm->data() + ")") -{ - fFunctionParms.reset(parm); -} - // deprecated constructor. use function name as string AggregateColumn::AggregateColumn(const string& functionName, const string& content, const uint32_t sessionID): ReturnedColumn(sessionID), @@ -137,20 +107,21 @@ AggregateColumn::AggregateColumn(const string& functionName, const string& conte fData(functionName + "(" + content + ")") { // TODO: need to handle distinct - fFunctionParms.reset(new ArithmeticColumn(content)); + SRCP srcp(new ArithmeticColumn(content)); + fAggParms.push_back(srcp); } AggregateColumn::AggregateColumn( const AggregateColumn& rhs, const uint32_t sessionID ): ReturnedColumn(rhs, sessionID), fFunctionName (rhs.fFunctionName), fAggOp(rhs.fAggOp), - fFunctionParms(rhs.fFunctionParms), fTableAlias(rhs.tableAlias()), fAsc(rhs.asc()), fData(rhs.data()), fConstCol(rhs.fConstCol) { fAlias = rhs.alias(); + fAggParms = rhs.fAggParms; } /** @@ -166,10 +137,14 @@ const string AggregateColumn::toString() const if (fAlias.length() > 0) output << "/Alias: " << fAlias << endl; - if (fFunctionParms == 0) - output << "No arguments" << endl; + if (fAggParms.size() == 0) + output << "No arguments"; else - output << *fFunctionParms << endl; + for (uint32_t i = 0; i < fAggParms.size(); ++i) + { + output << *(fAggParms[i]) << " "; + } + output << endl; if (fConstCol) output << *fConstCol; @@ -191,10 +166,11 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const b << fFunctionName; b << static_cast(fAggOp); - if (fFunctionParms == 0) - b << (uint8_t) ObjectReader::NULL_CLASS; - else - fFunctionParms->serialize(b); + b << static_cast(fAggParms.size()); + for (uint32_t i = 0; i < fAggParms.size(); ++i) + { + fAggParms[i]->serialize(b); + } b << static_cast(fGroupByColList.size()); @@ -219,20 +195,26 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const void AggregateColumn::unserialize(messageqcpp::ByteStream& b) { - ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN); - fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end()); - fProjectColList.erase(fProjectColList.begin(), fProjectColList.end()); - ReturnedColumn::unserialize(b); - b >> fFunctionName; - b >> fAggOp; - //delete fFunctionParms; - fFunctionParms.reset( - dynamic_cast(ObjectReader::createTreeNode(b))); - messageqcpp::ByteStream::quadbyte size; messageqcpp::ByteStream::quadbyte i; ReturnedColumn* rc; + ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN); + fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end()); + fProjectColList.erase(fProjectColList.begin(), fProjectColList.end()); + fAggParms.erase(fAggParms.begin(), fAggParms.end()); + ReturnedColumn::unserialize(b); + b >> fFunctionName; + b >> fAggOp; + + b >> size; + for (i = 0; i < size; i++) + { + rc = dynamic_cast(ObjectReader::createTreeNode(b)); + SRCP srcp(rc); + fAggParms.push_back(srcp); + } + b >> size; for (i = 0; i < size; i++) @@ -261,6 +243,7 @@ void AggregateColumn::unserialize(messageqcpp::ByteStream& b) bool AggregateColumn::operator==(const AggregateColumn& t) const { const ReturnedColumn* rc1, *rc2; + AggParms::const_iterator it, it2; rc1 = static_cast(this); rc2 = static_cast(&t); @@ -277,16 +260,18 @@ bool AggregateColumn::operator==(const AggregateColumn& t) const if (fAggOp != t.fAggOp) return false; - if (fFunctionParms.get() != NULL && t.fFunctionParms.get() != NULL) + if (aggParms().size() != t.aggParms().size()) { - if (*fFunctionParms.get() != t.fFunctionParms.get()) + return false; + } + for (it = fAggParms.begin(), it2 = t.fAggParms.begin(); + it != fAggParms.end(); + ++it, ++it2) + { + if (**it != **it2) return false; } - else if (fFunctionParms.get() != NULL || t.fFunctionParms.get() != NULL) - return false; - //if (fAlias != t.fAlias) - // return false; if (fTableAlias != t.fTableAlias) return false; @@ -645,3 +630,4 @@ AggregateColumn::AggOp AggregateColumn::agname2num(const string& agname) } } // namespace execplan + diff --git a/dbcon/execplan/aggregatecolumn.h b/dbcon/execplan/aggregatecolumn.h index d1db7e5a4..b0884f179 100644 --- a/dbcon/execplan/aggregatecolumn.h +++ b/dbcon/execplan/aggregatecolumn.h @@ -40,6 +40,8 @@ class ByteStream; namespace execplan { +typedef std::vector AggParms; + /** * @brief A class to represent a aggregate return column * @@ -74,7 +76,8 @@ public: BIT_OR, BIT_XOR, GROUP_CONCAT, - UDAF + UDAF, + MULTI_PARM }; /** @@ -94,21 +97,6 @@ public: */ AggregateColumn(const uint32_t sessionID); - /** - * ctor - */ - AggregateColumn(const AggOp aggop, ReturnedColumn* parm, const uint32_t sessionID = 0); - - /** - * ctor - */ - AggregateColumn(const AggOp aggop, const std::string& content, const uint32_t sessionID = 0); - - /** - * ctor - */ - AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID = 0); - /** * ctor */ @@ -155,24 +143,27 @@ public: fAggOp = aggOp; } + /** get function parms - * - * set the function parms from this object */ - virtual const SRCP functionParms() const + virtual AggParms& aggParms() { - return fFunctionParms; + return fAggParms; + } + + virtual const AggParms& aggParms() const + { + return fAggParms; } /** set function parms - * - * set the function parms for this object */ - virtual void functionParms(const SRCP& functionParms) + virtual void aggParms(const AggParms& parms) { - fFunctionParms = functionParms; + fAggParms = parms; } + /** return a copy of this pointer * * deep copy of this pointer and return the copy @@ -325,9 +316,10 @@ protected: uint8_t fAggOp; /** - * A ReturnedColumn objects that are the arguments to this function + * ReturnedColumn objects that are the arguments to this + * function */ - SRCP fFunctionParms; + AggParms fAggParms; /** table alias * A string to represent table alias name which contains this column diff --git a/dbcon/joblist/expressionstep.cpp b/dbcon/joblist/expressionstep.cpp index 0e064c359..4a8a14ff3 100644 --- a/dbcon/joblist/expressionstep.cpp +++ b/dbcon/joblist/expressionstep.cpp @@ -56,6 +56,17 @@ using namespace rowgroup; namespace joblist { +ExpressionStep::ExpressionStep() : + fExpressionFilter(NULL), + fExpressionId(-1), + fVarBinOK(false), + fSelectFilter(false), + fAssociatedJoinId(0), + fDoJoin(false), + fVirtual(false) +{ +} + ExpressionStep::ExpressionStep(const JobInfo& jobInfo) : JobStep(jobInfo), fExpressionFilter(NULL), @@ -68,7 +79,6 @@ ExpressionStep::ExpressionStep(const JobInfo& jobInfo) : { } - ExpressionStep::ExpressionStep(const ExpressionStep& rhs) : JobStep(rhs), fExpression(rhs.expression()), diff --git a/dbcon/joblist/expressionstep.h b/dbcon/joblist/expressionstep.h index 4a069440f..63423fc7d 100644 --- a/dbcon/joblist/expressionstep.h +++ b/dbcon/joblist/expressionstep.h @@ -50,6 +50,7 @@ class ExpressionStep : public JobStep { public: // constructors + ExpressionStep(); ExpressionStep(const JobInfo&); // destructor constructors virtual ~ExpressionStep(); diff --git a/dbcon/joblist/groupconcat.cpp b/dbcon/joblist/groupconcat.cpp index 234fc0a8e..afc91a2ec 100644 --- a/dbcon/joblist/groupconcat.cpp +++ b/dbcon/joblist/groupconcat.cpp @@ -78,7 +78,7 @@ void GroupConcatInfo::prepGroupConcat(JobInfo& jobInfo) while (i != jobInfo.groupConcatCols.end()) { GroupConcatColumn* gcc = dynamic_cast(i->get()); - const RowColumn* rcp = dynamic_cast(gcc->functionParms().get()); + const RowColumn* rcp = dynamic_cast(gcc->aggParms()[0].get()); SP_GroupConcat groupConcat(new GroupConcat); groupConcat->fSeparator = gcc->separator(); diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index a48ecd13a..4cf7bccc5 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -18,7 +18,6 @@ // $Id: joblistfactory.cpp 9632 2013-06-18 22:18:20Z xlou $ - #include #include #include @@ -870,7 +869,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo if (gcc != NULL) { - srcp = gcc->functionParms(); + srcp = gcc->aggParms()[0]; const RowColumn* rcp = dynamic_cast(srcp.get()); const vector& cols = rcp->columnVec(); @@ -891,21 +890,55 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } +#if 0 + // MCOL-1201 Add support for multi-parameter UDAnF + UDAFColumn* udafc = dynamic_cast(retCols[i].get()); + if (udafc != NULL) + { + srcp = udafc->aggParms()[0]; + const RowColumn* rcp = dynamic_cast(srcp.get()); + const vector& cols = rcp->columnVec(); + for (vector::const_iterator j = cols.begin(); j != cols.end(); j++) + { + srcp = *j; + if (dynamic_cast(srcp.get()) == NULL) + retCols.push_back(srcp); + + // Do we need this? + const ArithmeticColumn* ac = dynamic_cast(srcp.get()); + const FunctionColumn* fc = dynamic_cast(srcp.get()); + if (ac != NULL || fc != NULL) + { + // bug 3728, make a dummy expression step for each expression. + scoped_ptr es(new ExpressionStep(jobInfo)); + es->expression(srcp, jobInfo); + } + } + continue; + } +#endif srcp = retCols[i]; const AggregateColumn* ag = dynamic_cast(retCols[i].get()); - - if (ag != NULL) - srcp = ag->functionParms(); - - const ArithmeticColumn* ac = dynamic_cast(srcp.get()); - const FunctionColumn* fc = dynamic_cast(srcp.get()); - - if (ac != NULL || fc != NULL) + // bug 3728 Make a dummy expression for srcp if it is an + // expression. This is needed to fill in some stuff. + // Note that es.expression does nothing if the item is not an expression. + if (ag == NULL) { - // bug 3728, make a dummy expression step for each expression. - scoped_ptr es(new ExpressionStep(jobInfo)); - es->expression(srcp, jobInfo); + // Not an aggregate. Make a dummy expression for the item + ExpressionStep es; + es.expression(srcp, jobInfo); + } + else + { + // MCOL-1201 multi-argument aggregate. make a dummy expression + // step for each argument that is an expression. + for (uint32_t i = 0; i < ag->aggParms().size(); ++i) + { + srcp = ag->aggParms()[i]; + ExpressionStep es; + es.expression(srcp, jobInfo); + } } } @@ -915,17 +948,18 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo { srcp = retCols[i]; const SimpleColumn* sc = dynamic_cast(srcp.get()); + AggregateColumn* aggc = dynamic_cast(srcp.get()); bool doDistinct = (csep->distinct() && csep->groupByCols().empty()); uint32_t tupleKey = -1; string alias; string view; - // returned column could be groupby column, a simplecoulumn not a agregatecolumn + // returned column could be groupby column, a simplecoulumn not an aggregatecolumn int op = 0; CalpontSystemCatalog::OID dictOid = 0; CalpontSystemCatalog::ColType ct, aggCt; - if (sc == NULL) + if (aggc) { GroupConcatColumn* gcc = dynamic_cast(retCols[i].get()); @@ -939,7 +973,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo tupleKey = ti.key; jobInfo.returnedColVec.push_back(make_pair(tupleKey, gcc->aggOp())); // not a tokenOnly column. Mark all the columns involved - srcp = gcc->functionParms(); + srcp = gcc->aggParms()[0]; const RowColumn* rowCol = dynamic_cast(srcp.get()); if (rowCol) @@ -963,186 +997,353 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } - - AggregateColumn* ac = dynamic_cast(retCols[i].get()); - - if (ac != NULL) + else { - srcp = ac->functionParms(); - sc = dynamic_cast(srcp.get()); + // Aggregate column not group concat + AggParms& aggParms = aggc->aggParms(); - if (ac->constCol().get() != NULL) + for (uint32_t parm = 0; parm < aggParms.size(); ++parm) { - // replace the aggregate on constant with a count(*) - SRCP clone; - UDAFColumn* udafc = dynamic_cast(ac); - - if (udafc) + if (aggc->constCol().get() != NULL) { - clone.reset(new UDAFColumn(*udafc, ac->sessionID())); + // replace the aggregate on constant with a count(*) + SRCP clone; + UDAFColumn* udafc = dynamic_cast(aggc); + + if (udafc) + { + clone.reset(new UDAFColumn(*udafc, aggc->sessionID())); + } + else + { + clone.reset(new AggregateColumn(*aggc, aggc->sessionID())); + } + + jobInfo.constAggregate.insert(make_pair(i, clone)); + aggc->aggOp(AggregateColumn::COUNT_ASTERISK); + aggc->distinct(false); + } + + srcp = aggParms[parm]; + sc = dynamic_cast(srcp.get()); + if (parm == 0) + { + op = aggc->aggOp(); } else { - clone.reset(new AggregateColumn(*ac, ac->sessionID())); + op = AggregateColumn::MULTI_PARM; + } + doDistinct = aggc->distinct(); + if (aggParms.size() == 1) + { + // Set the col type based on the single parm. + // Changing col type based on a parm if multiple parms + // doesn't really make sense. + updateAggregateColType(aggc, srcp, op, jobInfo); + } + aggCt = aggc->resultType(); + + // As of bug3695, make sure varbinary is not used in aggregation. + // TODO: allow for UDAF + if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY) + throw runtime_error ("VARBINARY in aggregate function is not supported."); + + // Project the parm columns or expressions + if (sc != NULL) + { + CalpontSystemCatalog::OID retOid = sc->oid(); + CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); + alias = extractTableAlias(sc); + view = sc->viewName(); + + if (!sc->schemaName().empty()) + { + ct = sc->colType(); + + //XXX use this before connector sets colType in sc correctly. + if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) + ct = jobInfo.csc->colType(sc->oid()); + + //X + dictOid = isDictCol(ct); + } + else + { + retOid = (tblOid + 1) + sc->colPosition(); + ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; + } + + TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); + tupleKey = ti.key; + + // this is a string column + if (dictOid > 0) + { + map::iterator findit = jobInfo.tokenOnly.find(tupleKey); + + // if the column has never seen, and the op is count: possible need count only. + if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) + { + if (findit == jobInfo.tokenOnly.end()) + jobInfo.tokenOnly[tupleKey] = true; + } + // if aggregate other than count, token is not enough. + else if (op != 0 || doDistinct) + { + jobInfo.tokenOnly[tupleKey] = false; + } + + findit = jobInfo.tokenOnly.find(tupleKey); + + if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) + { + dictMap[tupleKey] = dictOid; + jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; + ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); + jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + } + } + } + else + { + const ArithmeticColumn* ac = NULL; + const FunctionColumn* fc = NULL; + const WindowFunctionColumn* wc = NULL; + bool hasAggCols = false; + + if ((ac = dynamic_cast(srcp.get())) != NULL) + { + if (ac->aggColumnList().size() > 0) + hasAggCols = true; + } + else if ((fc = dynamic_cast(srcp.get())) != NULL) + { + if (fc->aggColumnList().size() > 0) + hasAggCols = true; + } + else if (dynamic_cast(srcp.get()) != NULL) + { + std::ostringstream errmsg; + errmsg << "Invalid aggregate function nesting."; + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + else if (dynamic_cast(srcp.get()) != NULL) + { + } + else if ((wc = dynamic_cast(srcp.get())) == NULL) + { + std::ostringstream errmsg; + errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + + uint64_t eid = srcp.get()->expressionId(); + ct = srcp.get()->resultType(); + TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); + tupleKey = ti.key; + + if (hasAggCols) + jobInfo.expressionVec.push_back(tupleKey); } - jobInfo.constAggregate.insert(make_pair(i, clone)); - ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ac->distinct(false); - } + // add to project list + vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - op = ac->aggOp(); - doDistinct = ac->distinct(); - updateAggregateColType(ac, srcp, op, jobInfo); - aggCt = ac->resultType(); + if (keyIt == projectKeys.end()) + { + RetColsVector::iterator it = pcv.end(); - // As of bug3695, make sure varbinary is not used in aggregation. - if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY) - throw runtime_error ("VARBINARY in aggregate function is not supported."); - } - } + if (doDistinct) + it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); + else + it = pcv.insert(pcv.end(), srcp); - // simple column selected or aggregated - if (sc != NULL) - { - // one column only need project once - CalpontSystemCatalog::OID retOid = sc->oid(); - CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); - alias = extractTableAlias(sc); - view = sc->viewName(); + projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); + } + else if (doDistinct) // @bug4250, move forward distinct column if necessary. + { + uint32_t pos = distance(projectKeys.begin(), keyIt); - if (!sc->schemaName().empty()) - { - ct = sc->colType(); + if (pos >= lastGroupByPos) + { + pcv[pos] = pcv[lastGroupByPos]; + pcv[lastGroupByPos] = srcp; + projectKeys[pos] = projectKeys[lastGroupByPos]; + projectKeys[lastGroupByPos] = tupleKey; + lastGroupByPos++; + } + } -//XXX use this before connector sets colType in sc correctly. - if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) - ct = jobInfo.csc->colType(sc->oid()); + if (doDistinct && dictOid > 0) + tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; -//X - dictOid = isDictCol(ct); - } - else - { - retOid = (tblOid + 1) + sc->colPosition(); - ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; - } + // remember the columns to be returned + jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); - TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); - tupleKey = ti.key; + if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) + jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; - // this is a string column - if (dictOid > 0) - { - map::iterator findit = jobInfo.tokenOnly.find(tupleKey); - - // if the column has never seen, and the op is count: possible need count only. - if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) - { - if (findit == jobInfo.tokenOnly.end()) - jobInfo.tokenOnly[tupleKey] = true; - } - // if aggregate other than count, token is not enough. - else if (op != 0 || doDistinct) - { - jobInfo.tokenOnly[tupleKey] = false; - } - - findit = jobInfo.tokenOnly.find(tupleKey); - - if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) - { - dictMap[tupleKey] = dictOid; - jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; - ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); - jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } } else { - const ArithmeticColumn* ac = NULL; - const FunctionColumn* fc = NULL; - const WindowFunctionColumn* wc = NULL; - bool hasAggCols = false; - - if ((ac = dynamic_cast(srcp.get())) != NULL) + // Not an Aggregate + // simple column selected + if (sc != NULL) { - if (ac->aggColumnList().size() > 0) - hasAggCols = true; + // one column only need project once + CalpontSystemCatalog::OID retOid = sc->oid(); + CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); + alias = extractTableAlias(sc); + view = sc->viewName(); + + if (!sc->schemaName().empty()) + { + ct = sc->colType(); + + //XXX use this before connector sets colType in sc correctly. + if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) + ct = jobInfo.csc->colType(sc->oid()); + + //X + dictOid = isDictCol(ct); + } + else + { + retOid = (tblOid + 1) + sc->colPosition(); + ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; + } + + TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); + tupleKey = ti.key; + + // this is a string column + if (dictOid > 0) + { + map::iterator findit = jobInfo.tokenOnly.find(tupleKey); + + // if the column has never seen, and the op is count: possible need count only. + if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) + { + if (findit == jobInfo.tokenOnly.end()) + jobInfo.tokenOnly[tupleKey] = true; + } + // if aggregate other than count, token is not enough. + else if (op != 0 || doDistinct) + { + jobInfo.tokenOnly[tupleKey] = false; + } + + findit = jobInfo.tokenOnly.find(tupleKey); + + if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) + { + dictMap[tupleKey] = dictOid; + jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; + ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); + jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + } + } } - else if ((fc = dynamic_cast(srcp.get())) != NULL) - { - if (fc->aggColumnList().size() > 0) - hasAggCols = true; - } - else if (dynamic_cast(srcp.get()) != NULL) - { - std::ostringstream errmsg; - errmsg << "Invalid aggregate function nesting."; - cerr << boldStart << errmsg.str() << boldStop << endl; - throw logic_error(errmsg.str()); - } - else if ((wc = dynamic_cast(srcp.get())) == NULL) - { - std::ostringstream errmsg; - errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); - cerr << boldStart << errmsg.str() << boldStop << endl; - throw logic_error(errmsg.str()); - } - - uint64_t eid = srcp.get()->expressionId(); - ct = srcp.get()->resultType(); - TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); - tupleKey = ti.key; - - if (hasAggCols) - jobInfo.expressionVec.push_back(tupleKey); - } - - // add to project list - vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - - if (keyIt == projectKeys.end()) - { - RetColsVector::iterator it = pcv.end(); - - if (doDistinct) - it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); else - it = pcv.insert(pcv.end(), srcp); - - projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); - } - else if (doDistinct) // @bug4250, move forward distinct column if necessary. - { - uint32_t pos = distance(projectKeys.begin(), keyIt); - - if (pos >= lastGroupByPos) { - pcv[pos] = pcv[lastGroupByPos]; - pcv[lastGroupByPos] = srcp; - projectKeys[pos] = projectKeys[lastGroupByPos]; - projectKeys[lastGroupByPos] = tupleKey; - lastGroupByPos++; + const ArithmeticColumn* ac = NULL; + const FunctionColumn* fc = NULL; + const WindowFunctionColumn* wc = NULL; + bool hasAggCols = false; + + if ((ac = dynamic_cast(srcp.get())) != NULL) + { + if (ac->aggColumnList().size() > 0) + hasAggCols = true; + } + else if ((fc = dynamic_cast(srcp.get())) != NULL) + { + if (fc->aggColumnList().size() > 0) + hasAggCols = true; + } + else if (dynamic_cast(srcp.get()) != NULL) + { + std::ostringstream errmsg; + errmsg << "Invalid aggregate function nesting."; + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + else if (dynamic_cast(srcp.get()) != NULL) + { + } + else if ((wc = dynamic_cast(srcp.get())) == NULL) + { + std::ostringstream errmsg; + errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + + uint64_t eid = srcp.get()->expressionId(); + ct = srcp.get()->resultType(); + TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); + tupleKey = ti.key; + + if (hasAggCols) + jobInfo.expressionVec.push_back(tupleKey); } - } - if (doDistinct && dictOid > 0) - tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + // add to project list + vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - // remember the columns to be returned - jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + if (keyIt == projectKeys.end()) + { + RetColsVector::iterator it = pcv.end(); - if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) - jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + if (doDistinct) + it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); + else + it = pcv.insert(pcv.end(), srcp); - // bug 1499 distinct processing, save unique distinct columns - if (doDistinct && - (jobInfo.distinctColVec.end() == - find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) - { - jobInfo.distinctColVec.push_back(tupleKey); + projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); + } + else if (doDistinct) // @bug4250, move forward distinct column if necessary. + { + uint32_t pos = distance(projectKeys.begin(), keyIt); + + if (pos >= lastGroupByPos) + { + pcv[pos] = pcv[lastGroupByPos]; + pcv[lastGroupByPos] = srcp; + projectKeys[pos] = projectKeys[lastGroupByPos]; + projectKeys[lastGroupByPos] = tupleKey; + lastGroupByPos++; + } + } + + if (doDistinct && dictOid > 0) + tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + + // remember the columns to be returned + jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + + if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) + jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 9e23ac17b..ff490da5b 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -164,6 +164,9 @@ inline RowAggFunctionType functionIdMap(int planFuncId) case AggregateColumn::UDAF: return ROWAGG_UDAF; + case AggregateColumn::MULTI_PARM: + return ROWAGG_MULTI_PARM; + default: return ROWAGG_FUNCT_UNDEFINE; } @@ -1302,7 +1305,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -1468,7 +1471,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (!udafFuncCol) { - throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); @@ -1483,6 +1486,17 @@ void TupleAggregateStep::prep1PhaseAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(key); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + widthAgg.push_back(width[colProj]); + } + break; + default: { ostringstream emsg; @@ -1560,7 +1574,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(3)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVec[i]->fAuxColumnIndex = lastCol++; @@ -1675,7 +1689,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation map projColPosMap; @@ -1848,7 +1862,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -2043,7 +2057,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (!udafFuncCol) { - throw logic_error("prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); } // Return column @@ -2065,6 +2079,18 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(aggKey); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + widthAgg.push_back(widthProj[colProj]); + ++colAgg; + } + break; + default: { ostringstream emsg; @@ -2111,7 +2137,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( groupByNoDist.push_back(groupby); aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i)); } - + + projColsUDAFIndex = 0; // locate the return column position in aggregated rowgroup for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -2121,6 +2148,14 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colAgg = -1; + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + + if (udafc) + pUDAFFunc = udafc->getContext().getFunction(); + } + if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { @@ -2432,11 +2467,37 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { - SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + // update the aggregate function vector + SP_ROWAGG_FUNC_t funct; + if (aggOp == ROWAGG_UDAF) + { + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } + } + else + { + funct.reset(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + } if (aggOp == ROWAGG_COUNT_NO_OP) funct->fAuxColumnIndex = colAgg; @@ -2549,7 +2610,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep1PhaseDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVec2[i]->fAuxColumnIndex = lastCol++; @@ -2893,7 +2954,7 @@ void TupleAggregateStep::prep2PhasesAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation vector width; map projColPosMap; @@ -3036,12 +3097,11 @@ void TupleAggregateStep::prep2PhasesAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAggPm)); break; } - } if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -3240,7 +3300,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } oidsAggPm.push_back(oidsProj[colProj]); @@ -3261,6 +3321,18 @@ void TupleAggregateStep::prep2PhasesAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(scaleProj[colProj]); + precisionAggPm.push_back(precisionProj[colProj]); + typeAggPm.push_back(typeProj[colProj]); + widthAggPm.push_back(width[colProj]); + colAggPm++; + } + break; + default: { ostringstream emsg; @@ -3278,11 +3350,16 @@ void TupleAggregateStep::prep2PhasesAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap; AGG_MAP aggDupFuncMap; + projColsUDAFIndex = 0; // copy over the groupby vector // update the outputColumnIndex if returned for (uint64_t i = 0; i < groupByPm.size(); i++) @@ -3299,7 +3376,14 @@ void TupleAggregateStep::prep2PhasesAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colPm = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + ++multiParms; + continue; + } // Is this a UDAF? use the function as part of the key. + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; if (aggOp == ROWAGG_UDAF) @@ -3452,20 +3536,36 @@ void TupleAggregateStep::prep2PhasesAggregate( functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { + // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i)); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i-multiParms)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i-multiParms)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -3517,7 +3617,7 @@ void TupleAggregateStep::prep2PhasesAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = returnedColVec.size() - multiParms; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -3545,7 +3645,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; @@ -3691,6 +3791,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector groupByPm, groupByUm, groupByNoDist; vector functionVecPm, functionNoDistVec, functionVecUm; + list multiParmIndexes; uint32_t bigIntWidth = sizeof(int64_t); map, uint64_t> avgFuncDistMap; @@ -3702,7 +3803,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation vector width; map projColPosMap; @@ -3856,7 +3957,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -4050,7 +4151,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } // Return column @@ -4072,6 +4173,19 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(scaleProj[colProj]); + precisionAggPm.push_back(precisionProj[colProj]); + typeAggPm.push_back(typeProj[colProj]); + widthAggPm.push_back(width[colProj]); + multiParmIndexes.push_back(colAggPm); + colAggPm++; + } + break; + default: { ostringstream emsg; @@ -4093,12 +4207,23 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( groupByUm.push_back(groupby); } + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; for (uint32_t idx = 0; idx < functionVecPm.size(); idx++) + { SP_ROWAGG_FUNC_t funct; SP_ROWAGG_FUNC_t funcPm = functionVecPm[idx]; // UDAF support + if (funcPm->fAggFunction == ROWAGG_MULTI_PARM) + { + // Multi-Parm is not used on the UM + ++multiParms; + continue; + } if (funcPm->fAggFunction == ROWAGG_UDAF) { RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); @@ -4106,7 +4231,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); } else @@ -4116,18 +4241,25 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funcPm->fStatsFunction, funcPm->fOutputColumnIndex, funcPm->fOutputColumnIndex, - funcPm->fAuxColumnIndex)); + funcPm->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); } } - posAggUm = posAggPm; - oidsAggUm = oidsAggPm; - keysAggUm = keysAggPm; - scaleAggUm = scaleAggPm; - precisionAggUm = precisionAggPm; - widthAggUm = widthAggPm; - typeAggUm = typeAggPm; + // Copy over the PM arrays to the UM. Skip any that are a multi-parm entry. + for (uint32_t idx = 0; idx < oidsAggPm.size(); ++idx) + { + if (find (multiParmIndexes.begin(), multiParmIndexes.end(), idx ) != multiParmIndexes.end()) + { + continue; + } + oidsAggUm.push_back(oidsAggPm[idx]); + keysAggUm.push_back(keysAggPm[idx]); + scaleAggUm.push_back(scaleAggPm[idx]); + precisionAggUm.push_back(precisionAggPm[idx]); + widthAggUm.push_back(widthAggPm[idx]); + typeAggUm.push_back(typeAggPm[idx]); + } } @@ -4137,6 +4269,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4159,6 +4295,21 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colUm = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + ++multiParms; + continue; + } + + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + + if (udafc) + pUDAFFunc = udafc->getContext().getFunction(); + } + if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { @@ -4285,7 +4436,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it != aggFuncMap.end()) { - colUm = it->second; + colUm = it->second - multiParms; oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(keysAggUm[colUm]); scaleAggDist.push_back(scaleAggUm[colUm]); @@ -4309,7 +4460,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // false alarm returnColMissing = false; - colUm = it->second; + colUm = it->second - multiParms; if (aggOp == ROWAGG_SUM) { @@ -4412,21 +4563,36 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { + // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - pUDAFFunc = udafc->getContext().getFunction(); - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i)); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i-multiParms)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i-multiParms)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -4480,7 +4646,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = returnedColVec.size() - multiParms; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -4540,7 +4706,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; @@ -4687,6 +4853,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); groupBySub.push_back(groupby); + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; + // tricky part : 2 function vectors // -- dummy function vector for sub-aggregator, which does distinct only // -- aggregate function on this distinct column for rowAggDist @@ -4694,6 +4865,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } if (returnedColVec[k].first != distinctColKey) continue; @@ -4715,7 +4891,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -4732,9 +4908,15 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( { vector functionSub1 = functionNoDistVec; vector functionSub2; + int64_t multiParms = 0; for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } // search non-distinct functions in functionVec vector::iterator it = functionVecUm.begin(); @@ -4752,7 +4934,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } else if (f->fAggFunction == ROWAGG_COUNT_ASTERISK || @@ -4773,7 +4955,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index fac0cd032..5c1989d51 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4038,6 +4038,10 @@ ParseTree* buildParseTree(Item_func* item, gp_walk_info& gwi, bool& nonSupport) ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { + // MCOL-1201 For UDAnF multiple parameters + vector selCols; + vector orderCols; + if (!(gwi.thd->infinidb_vtable.cal_conn_info)) gwi.thd->infinidb_vtable.cal_conn_info = (void*)(new cal_connection_info()); @@ -4054,6 +4058,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // N.B. argument_count() is the # of formal parms to the agg fcn. InifniDB only supports 1 argument // TODO: Support more than one parm +#if 0 if (isp->argument_count() != 1 && isp->sum_func() != Item_sum::GROUP_CONCAT_FUNC && isp->sum_func() != Item_sum::UDF_SUM_FUNC) { @@ -4061,7 +4066,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_MUL_ARG_AGG); return NULL; } - +#endif AggregateColumn* ac = NULL; if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) @@ -4084,444 +4089,509 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { gwi.fatalParseError = true; gwi.parseErrorText = "Non supported aggregate type on the select clause"; + if (ac) + delete ac; return NULL; } - // special parsing for group_concat - if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + try { - Item_func_group_concat* gc = (Item_func_group_concat*)isp; + + // special parsing for group_concat + if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + Item_func_group_concat* gc = (Item_func_group_concat*)isp; vector orderCols; - RowColumn* rowCol = new RowColumn(); + RowColumn* rowCol = new RowColumn(); vector selCols; - uint32_t select_ctn = gc->count_field(); - ReturnedColumn* rc = NULL; + uint32_t select_ctn = gc->count_field(); + ReturnedColumn* rc = NULL; - for (uint32_t i = 0; i < select_ctn; i++) - { - rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); - - if (!rc || gwi.fatalParseError) - return NULL; - - selCols.push_back(SRCP(rc)); - } - - ORDER** order_item, **end; - - for (order_item = gc->get_order(), - end = order_item + gc->order_field(); order_item < end; - order_item++) - { - Item* ord_col = *(*order_item)->item; - - if (ord_col->type() == Item::INT_ITEM) + for (uint32_t i = 0; i < select_ctn; i++) { - Item_int* id = (Item_int*)ord_col; - - if (id->val_int() > (int)selCols.size()) - { - gwi.fatalParseError = true; - return NULL; - } - - rc = selCols[id->val_int() - 1]->clone(); - rc->orderPos(id->val_int() - 1); - } - else - { - rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); + rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); if (!rc || gwi.fatalParseError) { + if (ac) + delete ac; return NULL; } + + selCols.push_back(SRCP(rc)); } - // 10.2 TODO: direction is now a tri-state flag - rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); - orderCols.push_back(SRCP(rc)); - } + ORDER** order_item, **end; - rowCol->columnVec(selCols); - (dynamic_cast(ac))->orderCols(orderCols); - parm.reset(rowCol); - - if (gc->str_separator()) - { - string separator; - separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); - (dynamic_cast(ac))->separator(separator); - } - } - else - { - for (uint32_t i = 0; i < isp->argument_count(); i++) - { - Item* sfitemp = sfitempp[i]; - Item::Type sfitype = sfitemp->type(); - - switch (sfitype) + for (order_item = gc->get_order(), + end = order_item + gc->order_field(); order_item < end; + order_item++) { - case Item::FIELD_ITEM: - { - Item_field* ifp = reinterpret_cast(sfitemp); - SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + Item* ord_col = *(*order_item)->item; - if (!sc) + if (ord_col->type() == Item::INT_ITEM) + { + Item_int* id = (Item_int*)ord_col; + + if (id->val_int() > (int)selCols.size()) { gwi.fatalParseError = true; - break; + if (ac) + delete ac; + return NULL; } - parm.reset(sc); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); - TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); - gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); - break; + rc = selCols[id->val_int() - 1]->clone(); + rc->orderPos(id->val_int() - 1); } - - case Item::INT_ITEM: - case Item::STRING_ITEM: - case Item::REAL_ITEM: - case Item::DECIMAL_ITEM: + else { - // treat as count(*) - if (ac->aggOp() == AggregateColumn::COUNT) - ac->aggOp(AggregateColumn::COUNT_ASTERISK); + rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::NULL_ITEM: - { - //ac->aggOp(AggregateColumn::COUNT); - parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); - //ac->functionParms(parm); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::FUNC_ITEM: - { - Item_func* ifp = (Item_func*)sfitemp; - ReturnedColumn* rc = 0; - - // check count(1+1) case - vector tmpVec; - uint16_t parseInfo = 0; - parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); - - if (parseInfo & SUB_BIT) + if (!rc || gwi.fatalParseError) { - gwi.fatalParseError = true; - break; - } - else if (!gwi.fatalParseError && - !(parseInfo & AGG_BIT) && - !(parseInfo & AF_BIT) && - tmpVec.size() == 0) - { - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - FunctionColumn* fc = dynamic_cast(rc); - - if ((fc && fc->functionParms().empty()) || !fc) - { - //ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (dynamic_cast(rc)) - { - //@bug5229. handle constant function on aggregate argument - ac->constCol(SRCP(rc)); - break; - } - } - } - - // MySQL carelessly allows correlated aggregate function on the WHERE clause. - // Here is the work around to deal with that inconsistence. - // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; - ClauseType clauseType = gwi.clauseType; - - if (gwi.clauseType == WHERE) - gwi.clauseType = HAVING; - - // @bug 3603. for cases like max(rand()). try to build function first. - if (!rc) - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - - parm.reset(rc); - gwi.clauseType = clauseType; - - if (gwi.fatalParseError) - break; - - //ac->functionParms(parm); - break; - } - - case Item::REF_ITEM: - { - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (rc) - { - parm.reset(rc); - //ac->functionParms(parm); - break; + if (ac) + delete ac; + return NULL; } } - default: - { - gwi.fatalParseError = true; - //gwi.parseErrorText = "Non-supported Item in Aggregate function"; - } + // 10.2 TODO: direction is now a tri-state flag + rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); + orderCols.push_back(SRCP(rc)); } - if (gwi.fatalParseError) + rowCol->columnVec(selCols); + (dynamic_cast(ac))->orderCols(orderCols); + parm.reset(rowCol); + + if (gc->str_separator()) { - if (gwi.parseErrorText.empty()) - { - Message::Args args; - - if (item->name) - args.add(item->name); - else - args.add(""); - - gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); - } - - return NULL; + string separator; + separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); + (dynamic_cast(ac))->separator(separator); } } - } - - if (parm) - { - ac->functionParms(parm); - - if (isp->sum_func() == Item_sum::AVG_FUNC || - isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::DECIMAL; - ct.colWidth = 8; - ct.scale += 4; - break; - -#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; -#endif - - default: - break; - } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::COUNT_FUNC || - isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = parm->resultType().scale; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_FUNC || - isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - ct.colDataType = CalpontSystemCatalog::BIGINT; - - // no break, let fall through - - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - ct.colWidth = 8; - break; - - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::UBIGINT; - ct.colWidth = 8; - break; - -#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; -#endif - - default: - break; - } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::STD_FUNC || - isp->sum_func() == Item_sum::VARIANCE_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - ct.scale = 0; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = 0; - ct.precision = -16; // borrowed to indicate skip null value check on connector - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) - { - //Item_func_group_concat* gc = (Item_func_group_concat*)isp; - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::VARCHAR; - ct.colWidth = isp->max_length; - ct.precision = 0; - ac->resultType(ct); - } else { - ac->resultType(parm->resultType()); + for (uint32_t i = 0; i < isp->argument_count(); i++) + { + Item* sfitemp = sfitempp[i]; + Item::Type sfitype = sfitemp->type(); + + switch (sfitype) + { + case Item::FIELD_ITEM: + { + Item_field* ifp = reinterpret_cast(sfitemp); + SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + + if (!sc) + { + gwi.fatalParseError = true; + break; + } + + parm.reset(sc); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); + TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); + gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); + break; + } + + case Item::INT_ITEM: + case Item::STRING_ITEM: + case Item::REAL_ITEM: + case Item::DECIMAL_ITEM: + { + // treat as count(*) + if (ac->aggOp() == AggregateColumn::COUNT) + ac->aggOp(AggregateColumn::COUNT_ASTERISK); + + ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + break; + } + + case Item::NULL_ITEM: + { + parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); + ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + break; + } + + case Item::FUNC_ITEM: + { + Item_func* ifp = (Item_func*)sfitemp; + ReturnedColumn* rc = 0; + + // check count(1+1) case + vector tmpVec; + uint16_t parseInfo = 0; + parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); + + if (parseInfo & SUB_BIT) + { + gwi.fatalParseError = true; + break; + } + else if (!gwi.fatalParseError && + !(parseInfo & AGG_BIT) && + !(parseInfo & AF_BIT) && + tmpVec.size() == 0) + { + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + FunctionColumn* fc = dynamic_cast(rc); + + if ((fc && fc->functionParms().empty()) || !fc) + { + //ac->aggOp(AggregateColumn::COUNT_ASTERISK); + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (dynamic_cast(rc)) + { + //@bug5229. handle constant function on aggregate argument + ac->constCol(SRCP(rc)); + break; + } + } + } + + // MySQL carelessly allows correlated aggregate function on the WHERE clause. + // Here is the work around to deal with that inconsistence. + // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; + ClauseType clauseType = gwi.clauseType; + + if (gwi.clauseType == WHERE) + gwi.clauseType = HAVING; + + // @bug 3603. for cases like max(rand()). try to build function first. + if (!rc) + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + + parm.reset(rc); + gwi.clauseType = clauseType; + + if (gwi.fatalParseError) + break; + + break; + } + + case Item::REF_ITEM: + { + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (rc) + { + parm.reset(rc); + break; + } + } + + default: + { + gwi.fatalParseError = true; + //gwi.parseErrorText = "Non-supported Item in Aggregate function"; + } + } + + if (gwi.fatalParseError) + { + if (gwi.parseErrorText.empty()) + { + Message::Args args; + + if (item->name) + args.add(item->name); + else + args.add(""); + + gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); + } + + if (ac) + delete ac; + return NULL; + } + if (parm) + { + // MCOL-1201 multi-argument aggregate + ac->aggParms().push_back(parm); + } + } } - } - else - { - ac->resultType(colType_MysqlToIDB(isp)); - } - // adjust decimal result type according to internalDecimalScale - if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) - { - CalpontSystemCatalog::ColType ct = ac->resultType(); - ct.scale = gwi.internalDecimalScale; - ac->resultType(ct); - } - - // check for same aggregate on the select list - ac->expressionId(ci->expressionId++); - - if (gwi.clauseType != SELECT) - { - for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) + // Get result type + // Modified for MCOL-1201 multi-argument aggregate + if (ac->aggParms().size() > 0) { - if (*ac == gwi.returnedCols[i].get()) - ac->expressionId(gwi.returnedCols[i]->expressionId()); - } - } + // These are all one parm functions, so we can safely + // use the first parm for result type. + parm = ac->aggParms()[0]; + if (isp->sum_func() == Item_sum::AVG_FUNC || + isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); - // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will - // be applied in ExeMgr. When the ExeMgr fix is available, this checking - // will be taken out. - if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + switch (ct.colDataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::DECIMAL; + ct.colWidth = 8; + ct.scale += 4; + break; + + #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; + #endif + + default: + break; + } + + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::COUNT_FUNC || + isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = parm->resultType().scale; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_FUNC || + isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); + + switch (ct.colDataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + ct.colDataType = CalpontSystemCatalog::BIGINT; + + // no break, let fall through + + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + ct.colWidth = 8; + break; + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::UBIGINT; + ct.colWidth = 8; + break; + + #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; + #endif + + default: + break; + } + + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::STD_FUNC || + isp->sum_func() == Item_sum::VARIANCE_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + ct.scale = 0; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = 0; + ct.precision = -16; // borrowed to indicate skip null value check on connector + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + //Item_func_group_concat* gc = (Item_func_group_concat*)isp; + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::VARCHAR; + ct.colWidth = isp->max_length; + ct.precision = 0; + ac->resultType(ct); + } + else + { + // UDAF result type will be set below. + ac->resultType(parm->resultType()); + } + } + else + { + ac->resultType(colType_MysqlToIDB(isp)); + } + + // adjust decimal result type according to internalDecimalScale + if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) + { + CalpontSystemCatalog::ColType ct = ac->resultType(); + ct.scale = gwi.internalDecimalScale; + ac->resultType(ct); + } + + // check for same aggregate on the select list + ac->expressionId(ci->expressionId++); + + if (gwi.clauseType != SELECT) + { + for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) + { + if (*ac == gwi.returnedCols[i].get()) + ac->expressionId(gwi.returnedCols[i]->expressionId()); + } + } + + // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will + // be applied in ExeMgr. When the ExeMgr fix is available, this checking + // will be taken out. + if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "No project column found for aggregate function"; + if (ac) + delete ac; + return NULL; + } + else if (ac->constCol()) + { + gwi.count_asterisk_list.push_back(ac); + } + + // For UDAF, populate the context and call the UDAF init() function. + // The return type is (should be) set in context by init(). + if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) + { + UDAFColumn* udafc = dynamic_cast(ac); + + if (udafc) + { + mcsv1Context& context = udafc->getContext(); + context.setName(isp->func_name()); + + // Set up the return type defaults for the call to init() + context.setResultType(udafc->resultType().colDataType); + context.setColWidth(udafc->resultType().colWidth); + context.setScale(udafc->resultType().scale); + context.setPrecision(udafc->resultType().precision); + + context.setParamCount(udafc->aggParms().size()); + ColumnDatum colType; + ColumnDatum colTypes[udafc->aggParms().size()]; + // Build the column type vector. + // Modified for MCOL-1201 multi-argument aggregate + for (uint32_t i = 0; i < udafc->aggParms().size(); ++i) + { + const execplan::CalpontSystemCatalog::ColType& resultType + = udafc->aggParms()[i]->resultType(); + colType.dataType = resultType.colDataType; + colType.precision = resultType.precision; + colType.scale = resultType.scale; + colTypes[i] = colType; + } + + // Call the user supplied init() + mcsv1sdk::mcsv1_UDAF* udaf = context.getFunction(); + if (!udaf) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "Aggregate Function " + context.getName() + " doesn't exist in the ColumnStore engine"; + if (ac) + delete ac; + return NULL; + } + if (udaf->init(&context, colTypes) == mcsv1_UDAF::ERROR) + { + gwi.fatalParseError = true; + gwi.parseErrorText = udafc->getContext().getErrorMessage(); + if (ac) + delete ac; + return NULL; + } + + // UDAF_OVER_REQUIRED means that this function is for Window + // Function only. Reject it here in aggregate land. + if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) + { + gwi.fatalParseError = true; + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, + context.getName()); + if (ac) + delete ac; + return NULL; + } + + // Set the return type as set in init() + CalpontSystemCatalog::ColType ct; + ct.colDataType = context.getResultType(); + ct.colWidth = context.getColWidth(); + ct.scale = context.getScale(); + ct.precision = context.getPrecision(); + udafc->resultType(ct); + } + } + + } + catch (std::logic_error e) { gwi.fatalParseError = true; - gwi.parseErrorText = "No project column found for aggregate function"; + gwi.parseErrorText = "error building Aggregate Function: "; + gwi.parseErrorText += e.what(); + if (ac) + delete ac; return NULL; } - else if (ac->constCol()) + catch (...) { - gwi.count_asterisk_list.push_back(ac); + gwi.fatalParseError = true; + gwi.parseErrorText = "error building Aggregate Function: Unspecified exception"; + if (ac) + delete ac; + return NULL; } - - // For UDAF, populate the context and call the UDAF init() function. - if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) - { - UDAFColumn* udafc = dynamic_cast(ac); - - if (udafc) - { - mcsv1Context& context = udafc->getContext(); - context.setName(isp->func_name()); - - // Set up the return type defaults for the call to init() - context.setResultType(udafc->resultType().colDataType); - context.setColWidth(udafc->resultType().colWidth); - context.setScale(udafc->resultType().scale); - context.setPrecision(udafc->resultType().precision); - - COL_TYPES colTypes; - execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; - - // Build the column type vector. For now, there is only one - colTypes.push_back(make_pair(udafc->functionParms()->alias(), udafc->functionParms()->resultType().colDataType)); - - // Call the user supplied init() - if (context.getFunction()->init(&context, colTypes) == mcsv1_UDAF::ERROR) - { - gwi.fatalParseError = true; - gwi.parseErrorText = udafc->getContext().getErrorMessage(); - return NULL; - } - - if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) - { - gwi.fatalParseError = true; - gwi.parseErrorText = - logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, - context.getName()); - return NULL; - } - - // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; - ct.colDataType = context.getResultType(); - ct.colWidth = context.getColWidth(); - ct.scale = context.getScale(); - ct.precision = context.getPrecision(); - udafc->resultType(ct); - } - } - return ac; } @@ -7843,7 +7913,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->functionParms(minSc); + (*coliter)->aggParms().push_back(minSc); } std::vector::iterator funciter; @@ -9923,7 +9993,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->functionParms(minSc); + (*coliter)->aggParms().push_back(minSc); } std::vector::iterator funciter; diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 5ca94562b..7cf476f3d 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -781,8 +781,11 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, bool h //double double_val = *(double*)(&value); //f2->store(double_val); - if (f2->decimals() < (uint32_t)row.getScale(s)) - f2->dec = (uint32_t)row.getScale(s); + if ((f2->decimals() == DECIMAL_NOT_SPECIFIED && row.getScale(s) > 0) + || f2->decimals() < row.getScale(s)) + { + f2->dec = row.getScale(s); + } f2->store(dl); @@ -5275,8 +5278,6 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE execplan::CalpontSelectExecutionPlan::ColumnMap::iterator colMapIter; execplan::CalpontSelectExecutionPlan::ColumnMap::iterator condColMapIter; execplan::ParseTree* ptIt; - execplan::ReturnedColumn* rcIt; - for (TABLE_LIST* tl = gi.groupByTables; tl; tl = tl->next_local) { mapiter = ci->tableMap.find(tl->table); diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp index 4b648cb15..8d68a6260 100644 --- a/dbcon/mysql/ha_window_function.cpp +++ b/dbcon/mysql/ha_window_function.cpp @@ -340,6 +340,7 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n ac->distinct(item_sum->has_with_distinct()); Window_spec* win_spec = wf->window_spec; SRCP srcp; + CalpontSystemCatalog::ColType ct; // For return type // arguments vector funcParms; @@ -370,18 +371,25 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n context.setColWidth(rt.colWidth); context.setScale(rt.scale); context.setPrecision(rt.precision); + context.setParamCount(funcParms.size()); + + mcsv1sdk::ColumnDatum colType; + mcsv1sdk::ColumnDatum colTypes[funcParms.size()]; // Turn on the Analytic flag so the function is aware it is being called // as a Window Function. context.setContextFlag(CONTEXT_IS_ANALYTIC); - COL_TYPES colTypes; - execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; - // Build the column type vector. + // Modified for MCOL-1201 multi-argument aggregate for (size_t i = 0; i < funcParms.size(); ++i) { - colTypes.push_back(make_pair(funcParms[i]->alias(), funcParms[i]->resultType().colDataType)); + const execplan::CalpontSystemCatalog::ColType& resultType + = funcParms[i]->resultType(); + colType.dataType = resultType.colDataType; + colType.precision = resultType.precision; + colType.scale = resultType.scale; + colTypes[i] = colType; } // Call the user supplied init() @@ -401,7 +409,6 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n } // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; ct.colDataType = context.getResultType(); ct.colWidth = context.getColWidth(); ct.scale = context.getScale(); @@ -419,10 +426,10 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n { case Item_sum::UDF_SUM_FUNC: { - uint64_t bIgnoreNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)); - char sIgnoreNulls[18]; - sprintf(sIgnoreNulls, "%lu", bIgnoreNulls); - srcp.reset(new ConstantColumn(sIgnoreNulls, (uint64_t)bIgnoreNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT + uint64_t bRespectNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) ? 0 : 1; + char sRespectNulls[18]; + sprintf(sRespectNulls, "%lu", bRespectNulls); + srcp.reset(new ConstantColumn(sRespectNulls, (uint64_t)bRespectNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT funcParms.push_back(srcp); break; } @@ -881,11 +888,13 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n return NULL; } - ac->resultType(colType_MysqlToIDB(item_sum)); - - // bug5736. Make the result type double for some window functions when - // infinidb_double_for_decimal_math is set. - ac->adjustResultType(); + if (item_sum->sum_func() != Item_sum::UDF_SUM_FUNC) + { + ac->resultType(colType_MysqlToIDB(item_sum)); + // bug5736. Make the result type double for some window functions when + // infinidb_double_for_decimal_math is set. + ac->adjustResultType(); + } ac->expressionId(ci->expressionId++); diff --git a/utils/common/any.hpp b/utils/common/any.hpp index be0ca679b..5408c5c87 100755 --- a/utils/common/any.hpp +++ b/utils/common/any.hpp @@ -9,123 +9,142 @@ * http://www.boost.org/LICENSE_1_0.txt */ +#include #include namespace static_any { namespace anyimpl { + struct bad_any_cast + { + }; - struct bad_any_cast - { - }; + struct empty_any + { + }; - struct empty_any - { - }; + struct base_any_policy + { + virtual void static_delete(void** x) = 0; + virtual void copy_from_value(void const* src, void** dest) = 0; + virtual void clone(void* const* src, void** dest) = 0; + virtual void move(void* const* src, void** dest) = 0; + virtual void* get_value(void** src) = 0; + virtual size_t get_size() = 0; + }; - struct base_any_policy - { - virtual void static_delete(void** x) = 0; - virtual void copy_from_value(void const* src, void** dest) = 0; - virtual void clone(void* const* src, void** dest) = 0; - virtual void move(void* const* src, void** dest) = 0; - virtual void* get_value(void** src) = 0; - virtual size_t get_size() = 0; - }; + template + struct typed_base_any_policy : base_any_policy + { + virtual size_t get_size() + { + return sizeof(T); + } + }; - template - struct typed_base_any_policy : base_any_policy - { - virtual size_t get_size() { return sizeof(T); } - }; + template + struct small_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) + { + } + virtual void copy_from_value(void const* src, void** dest) + { + new(dest) T(*reinterpret_cast(src)); + } + virtual void clone(void* const* src, void** dest) + { + *dest = *src; + } + virtual void move(void* const* src, void** dest) + { + *dest = *src; + } + virtual void* get_value(void** src) + { + return reinterpret_cast(src); + } + }; - template - struct small_any_policy : typed_base_any_policy - { - virtual void static_delete(void** x) { } - virtual void copy_from_value(void const* src, void** dest) - { new(dest) T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) { *dest = *src; } - virtual void move(void* const* src, void** dest) { *dest = *src; } - virtual void* get_value(void** src) { return reinterpret_cast(src); } - }; - - template - struct big_any_policy : typed_base_any_policy - { - virtual void static_delete(void** x) + template + struct big_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) { if (*x) - delete(*reinterpret_cast(x)); + delete(*reinterpret_cast(x)); *x = NULL; } - virtual void copy_from_value(void const* src, void** dest) + virtual void copy_from_value(void const* src, void** dest) { - *dest = new T(*reinterpret_cast(src)); + *dest = new T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) + virtual void clone(void* const* src, void** dest) { - *dest = new T(**reinterpret_cast(src)); + *dest = new T(**reinterpret_cast(src)); } - virtual void move(void* const* src, void** dest) + virtual void move(void* const* src, void** dest) { - (*reinterpret_cast(dest))->~T(); - **reinterpret_cast(dest) = **reinterpret_cast(src); + (*reinterpret_cast(dest))->~T(); + **reinterpret_cast(dest) = **reinterpret_cast(src); } - virtual void* get_value(void** src) { return *src; } - }; + virtual void* get_value(void** src) + { + return *src; + } + }; - template - struct choose_policy - { - typedef big_any_policy type; - }; + template + struct choose_policy + { + typedef big_any_policy type; + }; - template - struct choose_policy - { - typedef small_any_policy type; - }; + template + struct choose_policy + { + typedef small_any_policy type; + }; - struct any; + struct any; - /// Choosing the policy for an any type is illegal, but should never happen. - /// This is designed to throw a compiler error. - template<> - struct choose_policy - { - typedef void type; - }; + /// Choosing the policy for an any type is illegal, but should never happen. + /// This is designed to throw a compiler error. + template<> + struct choose_policy + { + typedef void type; + }; - /// Specializations for small types. - #define SMALL_POLICY(TYPE) template<> struct \ - choose_policy { typedef small_any_policy type; }; + /// Specializations for small types. +#define SMALL_POLICY(TYPE) template<> struct \ + choose_policy { typedef small_any_policy type; }; - SMALL_POLICY(char); - SMALL_POLICY(signed char); - SMALL_POLICY(unsigned char); - SMALL_POLICY(signed short); - SMALL_POLICY(unsigned short); - SMALL_POLICY(signed int); - SMALL_POLICY(unsigned int); - SMALL_POLICY(signed long); - SMALL_POLICY(unsigned long); - SMALL_POLICY(signed long long); - SMALL_POLICY(unsigned long long); - SMALL_POLICY(float); - SMALL_POLICY(double); - SMALL_POLICY(bool); + SMALL_POLICY(char); + SMALL_POLICY(signed char); + SMALL_POLICY(unsigned char); + SMALL_POLICY(signed short); + SMALL_POLICY(unsigned short); + SMALL_POLICY(signed int); + SMALL_POLICY(unsigned int); + SMALL_POLICY(signed long); + SMALL_POLICY(unsigned long); + SMALL_POLICY(signed long long); + SMALL_POLICY(unsigned long long); + SMALL_POLICY(float); + SMALL_POLICY(double); + SMALL_POLICY(bool); - #undef SMALL_POLICY +#undef SMALL_POLICY - /// This function will return a different policy for each type. - template - base_any_policy* get_policy() - { - static typename choose_policy::type policy; - return &policy; - }; + /// This function will return a different policy for each type. + template + base_any_policy* get_policy() + { + static typename choose_policy::type policy; + return &policy; + }; } class any @@ -139,37 +158,40 @@ public: /// Initializing constructor. template any(const T& x) - : policy(anyimpl::get_policy()), object(NULL) + : policy(anyimpl::get_policy()), object(NULL) { assign(x); } /// Empty constructor. any() - : policy(anyimpl::get_policy()), object(NULL) - { } + : policy(anyimpl::get_policy()), object(NULL) + { + } /// Special initializing constructor for string literals. any(const char* x) - : policy(anyimpl::get_policy()), object(NULL) - { + : policy(anyimpl::get_policy()), object(NULL) + { assign(x); } /// Copy constructor. any(const any& x) - : policy(anyimpl::get_policy()), object(NULL) - { + : policy(anyimpl::get_policy()), object(NULL) + { assign(x); } /// Destructor. - ~any() { + ~any() + { policy->static_delete(&object); } /// Assignment function from another any. - any& assign(const any& x) { + any& assign(const any& x) + { reset(); policy = x.policy; policy->clone(&x.object, &object); @@ -178,7 +200,8 @@ public: /// Assignment function. template - any& assign(const T& x) { + any& assign(const T& x) + { reset(); policy = anyimpl::get_policy(); policy->copy_from_value(&x, &object); @@ -197,8 +220,42 @@ public: return assign(x); } + /// Less than operator for sorting + bool operator<(const any& x) const + { + if (policy == x.policy) + { + void* p1 = const_cast(object); + void* p2 = const_cast(x.object); + return memcmp(policy->get_value(&p1), + x.policy->get_value(&p2), + policy->get_size()) < 0 ? 1 : 0; + } + return 0; + } + + /// equal operator + bool operator==(const any& x) const + { + if (policy == x.policy) + { + void* p1 = const_cast(object); + void* p2 = const_cast(x.object); + return memcmp(policy->get_value(&p1), + x.policy->get_value(&p2), + policy->get_size()) == 0 ? 1 : 0; + } + return 0; + } + /// Utility functions - any& swap(any& x) { + uint8_t getHash() const + { + void* p1 = const_cast(object); + return *(uint64_t*)policy->get_value(&p1) % 4048; + } + any& swap(any& x) + { std::swap(policy, x.policy); std::swap(object, x.object); return *this; @@ -206,27 +263,32 @@ public: /// Cast operator. You can only cast to the original type. template - T& cast() { - if (policy != anyimpl::get_policy()) + T& cast() + { + if (policy != anyimpl::get_policy()) throw anyimpl::bad_any_cast(); T* r = reinterpret_cast(policy->get_value(&object)); return *r; } /// Returns true if the any contains no value. - bool empty() const { + bool empty() const + { return policy == anyimpl::get_policy(); } /// Frees any allocated memory, and sets the value to NULL. - void reset() { + void reset() + { policy->static_delete(&object); policy = anyimpl::get_policy(); } /// Returns true if the two types are the same. - bool compatible(const any& x) const { + bool compatible(const any& x) const + { return policy == x.policy; } }; + } diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 8d110cfc8..c1f5bbd63 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -215,6 +215,22 @@ inline string getStringNullValue() namespace rowgroup { +const std::string typeStr(""); +const static_any::any& RowAggregation::charTypeId((char)1); +const static_any::any& RowAggregation::scharTypeId((signed char)1); +const static_any::any& RowAggregation::shortTypeId((short)1); +const static_any::any& RowAggregation::intTypeId((int)1); +const static_any::any& RowAggregation::longTypeId((long)1); +const static_any::any& RowAggregation::llTypeId((long long)1); +const static_any::any& RowAggregation::ucharTypeId((unsigned char)1); +const static_any::any& RowAggregation::ushortTypeId((unsigned short)1); +const static_any::any& RowAggregation::uintTypeId((unsigned int)1); +const static_any::any& RowAggregation::ulongTypeId((unsigned long)1); +const static_any::any& RowAggregation::ullTypeId((unsigned long long)1); +const static_any::any& RowAggregation::floatTypeId((float)1); +const static_any::any& RowAggregation::doubleTypeId((double)1); +const static_any::any& RowAggregation::strTypeId(typeStr); + KeyStorage::KeyStorage(const RowGroup& keys, Row** tRow) : tmpRow(tRow), rg(keys) { RGData data(rg); @@ -691,7 +707,8 @@ RowAggregation::RowAggregation(const vector& rowAggGroupByCol RowAggregation::RowAggregation(const RowAggregation& rhs): fAggMapPtr(NULL), fRowGroupOut(NULL), fTotalRowCount(0), fMaxTotalRowCount(AGG_ROWGROUP_SIZE), - fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0) + fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0), + fRGContext(rhs.fRGContext) { //fGroupByCols.clear(); //fFunctionCols.clear(); @@ -756,7 +773,6 @@ void RowAggregation::addRowGroup(const RowGroup* pRows, vector& in { // this function is for threaded aggregation, which is for group by and distinct. // if (countSpecial(pRows)) - Row rowIn; pRows->initRow(&rowIn); @@ -790,7 +806,7 @@ void RowAggregation::setJoinRowGroups(vector* pSmallSideRG, RowGroup* } //------------------------------------------------------------------------------ -// For UDAF, we need to sometimes start a new context. +// For UDAF, we need to sometimes start a new fRGContext. // // This will be called any number of times by each of the batchprimitiveprocessor // threads on the PM and by multple threads on the UM. It must remain @@ -801,29 +817,29 @@ void RowAggregation::resetUDAF(uint64_t funcColID) // Get the UDAF class pointer and store in the row definition object. RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColID].get()); - // resetUDAF needs to be re-entrant. Since we're modifying the context object - // by creating a new userData, we need a local copy. The copy constructor - // doesn't copy userData. - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + // RowAggregation and it's functions need to be re-entrant which means + // each instance (thread) needs its own copy of the context object. + // Note: operator=() doesn't copy userData. + fRGContext = rowUDAF->fUDAFContext; // Call the user reset for the group userData. Since, at this point, // context's userData will be NULL, reset will generate a new one. mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->reset(&rgContext); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } fRow.setUserDataStore(fRowGroupOut->getRGData()->getUserDataStore()); - fRow.setUserData(rgContext, - rgContext.getUserDataSP(), - rgContext.getUserDataSize(), + fRow.setUserData(fRGContext, + fRGContext.getUserDataSP(), + fRGContext.getUserDataSize(), rowUDAF->fAuxColumnIndex); - rgContext.setUserData(NULL); // Prevents calling deleteUserData on the context. + fRGContext.setUserData(NULL); // Prevents calling deleteUserData on the fRGContext. } //------------------------------------------------------------------------------ @@ -873,7 +889,6 @@ void RowAggregation::initialize() } } - // Save the RowGroup data pointer fResultDataVec.push_back(fRowGroupOut->getRGData()); @@ -1658,10 +1673,11 @@ void RowAggregation::updateEntry(const Row& rowIn) { for (uint64_t i = 0; i < fFunctionCols.size(); i++) { - int64_t colIn = fFunctionCols[i]->fInputColumnIndex; - int64_t colOut = fFunctionCols[i]->fOutputColumnIndex; + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[i]; + int64_t colIn = pFunctionCol->fInputColumnIndex; + int64_t colOut = pFunctionCol->fOutputColumnIndex; - switch (fFunctionCols[i]->fAggFunction) + switch (pFunctionCol->fAggFunction) { case ROWAGG_COUNT_COL_NAME: @@ -1675,7 +1691,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_MIN: case ROWAGG_MAX: case ROWAGG_SUM: - doMinMaxSum(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction); + doMinMaxSum(rowIn, colIn, colOut, pFunctionCol->fAggFunction); break; case ROWAGG_AVG: @@ -1692,7 +1708,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_BIT_OR: case ROWAGG_BIT_XOR: { - doBitOp(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction); + doBitOp(rowIn, colIn, colOut, pFunctionCol->fAggFunction); break; } @@ -1707,11 +1723,11 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); + RowUDAFFunctionCol* rowUDAF = dynamic_cast(pFunctionCol.get()); if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF); + doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF, i); } else { @@ -1725,7 +1741,7 @@ void RowAggregation::updateEntry(const Row& rowIn) { std::ostringstream errmsg; errmsg << "RowAggregation: function (id = " << - (uint64_t) fFunctionCols[i]->fAggFunction << ") is not supported."; + (uint64_t) pFunctionCol->fAggFunction << ") is not supported."; cerr << errmsg.str() << endl; throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); break; @@ -1997,131 +2013,142 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu } void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF) + RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { - std::vector valsIn; - execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupIn.getColTypes()[colIn]; - std::vector dataFlags; + int32_t paramCount = fRGContext.getParameterCount(); + // The vector of parameters to be sent to the UDAF + mcsv1sdk::ColumnDatum valsIn[paramCount]; + uint32_t dataFlags[paramCount]; - // Get the context for this rowGroup. Make a copy so we're thread safe. - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); - - // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; - - if (isNull(&fRowGroupIn, rowIn, colIn) == true) + execplan::CalpontSystemCatalog::ColDataType colDataType; + for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) { - if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + // Turn on NULL flags + dataFlags[i] = 0; + if (isNull(&fRowGroupIn, rowIn, colIn) == true) { - return; + if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + { + return; + } + dataFlags[i] |= mcsv1sdk::PARAM_IS_NULL; + } + + colDataType = fRowGroupIn.getColTypes()[colIn]; + if (!fRGContext.isParamNull(i)) + { + switch (colDataType) + { + case execplan::CalpontSystemCatalog::TINYINT: + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + case execplan::CalpontSystemCatalog::INT: + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + break; + } + + case execplan::CalpontSystemCatalog::UTINYINT: + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + case execplan::CalpontSystemCatalog::UINT: + case execplan::CalpontSystemCatalog::UBIGINT: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + datum.columnData = rowIn.getUintField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + { + datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; + datum.columnData = rowIn.getDoubleField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + { + datum.dataType = execplan::CalpontSystemCatalog::FLOAT; + datum.columnData = rowIn.getFloatField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + datum.columnData = rowIn.getUintField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::TIME: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + datum.columnData = rowIn.getIntField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + { + datum.dataType = colDataType; + datum.columnData = rowIn.getStringField(colIn); + break; + } + + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation " << fRGContext.getName() << + ": No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } + } } - flag |= mcsv1sdk::PARAM_IS_NULL; - } - - flags.push_back(flag); - rgContext.setDataFlags(&flags); - - mcsv1sdk::ColumnDatum datum; - - if (!rgContext.isParamNull(0)) - { - switch (colDataType) + // MCOL-1201: If there are multiple parameters, the next fFunctionCols + // will have the column used. By incrementing the funcColsIdx (passed by + // ref, we also increment the caller's index. + if (fFunctionCols.size() > funcColsIdx + 1 + && fFunctionCols[funcColsIdx+1]->fAggFunction == ROWAGG_MULTI_PARM) { - case execplan::CalpontSystemCatalog::TINYINT: - case execplan::CalpontSystemCatalog::SMALLINT: - case execplan::CalpontSystemCatalog::MEDINT: - case execplan::CalpontSystemCatalog::INT: - case execplan::CalpontSystemCatalog::BIGINT: - case execplan::CalpontSystemCatalog::DECIMAL: - case execplan::CalpontSystemCatalog::UDECIMAL: - { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - datum.scale = fRowGroupIn.getScale()[colIn]; - datum.precision = fRowGroupIn.getPrecision()[colIn]; - break; - } - - case execplan::CalpontSystemCatalog::UTINYINT: - case execplan::CalpontSystemCatalog::USMALLINT: - case execplan::CalpontSystemCatalog::UMEDINT: - case execplan::CalpontSystemCatalog::UINT: - case execplan::CalpontSystemCatalog::UBIGINT: - { - datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::DOUBLE: - case execplan::CalpontSystemCatalog::UDOUBLE: - { - datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; - datum.columnData = rowIn.getDoubleField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::FLOAT: - case execplan::CalpontSystemCatalog::UFLOAT: - { - datum.dataType = execplan::CalpontSystemCatalog::FLOAT; - datum.columnData = rowIn.getFloatField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::DATE: - case execplan::CalpontSystemCatalog::DATETIME: - { - datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::TIME: - { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::CHAR: - case execplan::CalpontSystemCatalog::VARCHAR: - case execplan::CalpontSystemCatalog::TEXT: - case execplan::CalpontSystemCatalog::VARBINARY: - case execplan::CalpontSystemCatalog::CLOB: - case execplan::CalpontSystemCatalog::BLOB: - { - datum.dataType = colDataType; - datum.columnData = rowIn.getStringField(colIn); - break; - } - - default: - { - std::ostringstream errmsg; - errmsg << "RowAggregation " << rgContext.getName() << - ": No logic for data type: " << colDataType; - throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); - break; - } + ++funcColsIdx; + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; + colIn = pFunctionCol->fInputColumnIndex; + colOut = pFunctionCol->fOutputColumnIndex; + } + else + { + break; } } - valsIn.push_back(datum); - // The intermediate values are stored in userData referenced by colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setDataFlags(dataFlags); + fRGContext.setUserData(fRow.getUserData(colAux)); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->nextValue(&rgContext, valsIn); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } } @@ -2218,6 +2245,7 @@ RowAggregationUM::RowAggregationUM(const RowAggregationUM& rhs) : fHasAvg(rhs.fHasAvg), fKeyOnHeap(rhs.fKeyOnHeap), fHasStatsFunc(rhs.fHasStatsFunc), + fHasUDAF(rhs.fHasUDAF), fExpression(rhs.fExpression), fTotalMemUsage(rhs.fTotalMemUsage), fRm(rhs.fRm), @@ -2419,7 +2447,7 @@ void RowAggregationUM::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { @@ -2585,22 +2613,6 @@ void RowAggregationUM::calculateAvgColumns() // Sets the value from valOut into column colOut, performing any conversions. void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) { - static const static_any::any& charTypeId((char)1); - static const static_any::any& scharTypeId((signed char)1); - static const static_any::any& shortTypeId((short)1); - static const static_any::any& intTypeId((int)1); - static const static_any::any& longTypeId((long)1); - static const static_any::any& llTypeId((long long)1); - static const static_any::any& ucharTypeId((unsigned char)1); - static const static_any::any& ushortTypeId((unsigned short)1); - static const static_any::any& uintTypeId((unsigned int)1); - static const static_any::any& ulongTypeId((unsigned long)1); - static const static_any::any& ullTypeId((unsigned long long)1); - static const static_any::any& floatTypeId((float)1); - static const static_any::any& doubleTypeId((double)1); - static const std::string typeStr(""); - static const static_any::any& strTypeId(typeStr); - execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; if (valOut.empty()) @@ -2609,6 +2621,179 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) return; } + int64_t intOut = 0; + uint64_t uintOut = 0; + float floatOut = 0.0; + double doubleOut = 0.0; + ostringstream oss; + std::string strOut; + + bool bSetSuccess = false; + switch (colDataType) + { + case execplan::CalpontSystemCatalog::BIT: + case execplan::CalpontSystemCatalog::TINYINT: + if (valOut.compatible(charTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + else if (valOut.compatible(scharTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + if (bSetSuccess) + { + fRow.setIntField<1>(intOut, colOut); + } + break; + + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + if (valOut.compatible(shortTypeId)) + { + intOut = valOut.cast(); + fRow.setIntField<2>(intOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::INT: + if (valOut.compatible(uintTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + else if (valOut.compatible(longTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + if (bSetSuccess) + { + fRow.setIntField<4>(intOut, colOut); + } + break; + + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + if (valOut.compatible(llTypeId)) + { + intOut = valOut.cast(); + fRow.setIntField<8>(intOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UTINYINT: + if (valOut.compatible(ucharTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<1>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + if (valOut.compatible(ushortTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<2>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UINT: + if (valOut.compatible(uintTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<4>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UBIGINT: + if (valOut.compatible(ulongTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<8>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + if (valOut.compatible(ulongTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<8>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + if (valOut.compatible(floatTypeId)) + { + floatOut = valOut.cast(); + fRow.setFloatField(floatOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + if (valOut.compatible(doubleTypeId)) + { + doubleOut = valOut.cast(); + fRow.setDoubleField(doubleOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + fRow.setStringField(strOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + fRow.setVarBinaryField(strOut, colOut); + bSetSuccess = true; + } + break; + + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation: No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } + } + if (!bSetSuccess) + { + SetUDAFAnyValue(valOut, colOut); + } +} + +void RowAggregationUM::SetUDAFAnyValue(static_any::any& valOut, int64_t colOut) +{ + execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; + // This may seem a bit convoluted. Users shouldn't return a type // that they didn't set in mcsv1_UDAF::init(), but this // handles whatever return type is given and casts @@ -2814,7 +2999,7 @@ void RowAggregationUM::calculateUDAFColumns() continue; rowUDAF = dynamic_cast(fFunctionCols[i].get()); - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + fRGContext = rowUDAF->fUDAFContext; int64_t colOut = rowUDAF->fOutputColumnIndex; int64_t colAux = rowUDAF->fAuxColumnIndex; @@ -2826,26 +3011,26 @@ void RowAggregationUM::calculateUDAFColumns() fRowGroupOut->getRow(j, &fRow); // Turn the NULL flag off. We can't know NULL at this point - rgContext.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); // The intermediate values are stored in colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setUserData(fRow.getUserData(colAux)); // Call the UDAF evaluate function mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->evaluate(&rgContext, valOut); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); } - rgContext.setUserData(NULL); + fRGContext.setUserData(NULL); } } @@ -3116,54 +3301,60 @@ void RowAggregationUM::doNullConstantAggregate(const ConstantAggData& aggData, u { // For a NULL constant, call nextValue with NULL and then evaluate. bool bInterrupted = false; - mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext); - context.setInterrupted(bInterrupted); - context.createUserData(); + fRGContext.setInterrupted(bInterrupted); + fRGContext.createUserData(); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - std::vector valsIn; + mcsv1sdk::ColumnDatum valsIn[1]; // Call a reset, then nextValue, then execute. This will evaluate // the UDAF for the constant. - rc = context.getFunction()->reset(&context); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } - +#if 0 + uint32_t dataFlags[fRGContext.getParameterCount()]; + for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) + { + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + // Turn on NULL flags + dataFlags[i] = 0; + } +#endif // Turn the NULL and CONSTANT flags on. - std::vector flags; - uint32_t flag = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; - flags.push_back(flag); - context.setDataFlags(&flags); + uint32_t flags[1]; + flags[0] = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; + fRGContext.setDataFlags(flags); // Create a dummy datum - mcsv1sdk::ColumnDatum datum; + mcsv1sdk::ColumnDatum& datum = valsIn[0]; datum.dataType = execplan::CalpontSystemCatalog::BIGINT; datum.columnData = 0; - valsIn.push_back(datum); - rc = context.getFunction()->nextValue(&context, valsIn); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } static_any::any valOut; - rc = context.getFunction()->evaluate(&context, valOut); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); - context.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); } break; @@ -3460,30 +3651,28 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData case ROWAGG_UDAF: { bool bInterrupted = false; - mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext); - context.setInterrupted(bInterrupted); - context.createUserData(); + fRGContext.setInterrupted(bInterrupted); + fRGContext.createUserData(); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - std::vector valsIn; + mcsv1sdk::ColumnDatum valsIn[1]; // Call a reset, then nextValue, then execute. This will evaluate // the UDAF for the constant. - rc = context.getFunction()->reset(&context); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Turn the CONSTANT flags on. - std::vector flags; - uint32_t flag = mcsv1sdk::PARAM_IS_CONSTANT; - flags.push_back(flag); - context.setDataFlags(&flags); + uint32_t flags[1]; + flags[0] = mcsv1sdk::PARAM_IS_CONSTANT; + fRGContext.setDataFlags(flags); // Create a datum item for sending to UDAF - mcsv1sdk::ColumnDatum datum; + mcsv1sdk::ColumnDatum& datum = valsIn[0]; datum.dataType = (CalpontSystemCatalog::ColDataType)colDataType; switch (colDataType) @@ -3567,27 +3756,27 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData break; } - valsIn.push_back(datum); - rc = context.getFunction()->nextValue(&context, valsIn); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } static_any::any valOut; - rc = context.getFunction()->evaluate(&context, valOut); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); - context.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); } break; @@ -3806,7 +3995,7 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { @@ -4011,45 +4200,43 @@ void RowAggregationUMP2::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut // rowUDAF(in) - pointer to the RowUDAFFunctionCol for this UDAF instance //------------------------------------------------------------------------------ void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF) + RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { static_any::any valOut; - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); // Get the user data boost::shared_ptr userData = rowIn.getUserData(colIn + 1); // Unlike other aggregates, the data isn't in colIn, so testing it for NULL // there won't help. In case of NULL, userData will be NULL. - std::vector flags; - uint32_t flag = 0; + uint32_t flags[1]; + flags[0] = 0; if (!userData) { - if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { return; } // Turn on NULL flags - flag |= mcsv1sdk::PARAM_IS_NULL; + flags[0] |= mcsv1sdk::PARAM_IS_NULL; } - flags.push_back(flag); - rgContext.setDataFlags(&flags); + fRGContext.setDataFlags(flags); // The intermediate values are stored in colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setUserData(fRow.getUserData(colAux)); // Call the UDAF subEvaluate method mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->subEvaluate(&rgContext, userData.get()); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userData.get()); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::IDBExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::IDBExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } } @@ -4246,7 +4433,7 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index b6294f193..282f354fc 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -110,6 +110,9 @@ enum RowAggFunctionType // User Defined Aggregate Function ROWAGG_UDAF, + // If an Aggregate has more than one parameter, this will be used for parameters after the first + ROWAGG_MULTI_PARM, + // internal function type to avoid duplicate the work // handling ROWAGG_COUNT_NO_OP, ROWAGG_DUP_FUNCT and ROWAGG_DUP_AVG is a little different // ROWAGG_COUNT_NO_OP : count done by AVG, no need to copy @@ -583,7 +586,7 @@ protected: virtual void doAvg(const Row&, int64_t, int64_t, int64_t); virtual void doStatistics(const Row&, int64_t, int64_t, int64_t); virtual void doBitOp(const Row&, int64_t, int64_t, int); - virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); + virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); virtual bool countSpecial(const RowGroup* pRG) { fRow.setIntField<8>(fRow.getIntField<8>(0) + pRG->getRowCount(), 0); @@ -660,6 +663,25 @@ protected: //need access to rowgroup storage holding the rows to hash & ==. friend class AggHasher; friend class AggComparator; + + // We need a separate copy for each thread. + mcsv1sdk::mcsv1Context fRGContext; + + // These are handy for testing the actual type of static_any for UDAF + static const static_any::any& charTypeId; + static const static_any::any& scharTypeId; + static const static_any::any& shortTypeId; + static const static_any::any& intTypeId; + static const static_any::any& longTypeId; + static const static_any::any& llTypeId; + static const static_any::any& ucharTypeId; + static const static_any::any& ushortTypeId; + static const static_any::any& uintTypeId; + static const static_any::any& ulongTypeId; + static const static_any::any& ullTypeId; + static const static_any::any& floatTypeId; + static const static_any::any& doubleTypeId; + static const static_any::any& strTypeId; }; //------------------------------------------------------------------------------ @@ -783,6 +805,9 @@ protected: // Sets the value from valOut into column colOut, performing any conversions. void SetUDAFValue(static_any::any& valOut, int64_t colOut); + // If the datatype returned by evaluate isn't what we expect, convert. + void SetUDAFAnyValue(static_any::any& valOut, int64_t colOut); + // calculate the UDAF function all rows received. UM only function. void calculateUDAFColumns(); @@ -877,7 +902,7 @@ protected: void doStatistics(const Row&, int64_t, int64_t, int64_t); void doGroupConcat(const Row&, int64_t, int64_t); void doBitOp(const Row&, int64_t, int64_t, int); - void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); + void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); bool countSpecial(const RowGroup* pRG) { return false; diff --git a/utils/udfsdk/CMakeLists.txt b/utils/udfsdk/CMakeLists.txt index e69ff4d88..01009e35a 100755 --- a/utils/udfsdk/CMakeLists.txt +++ b/utils/udfsdk/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ########### next target ############### -set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp) +set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp regr_avgx.cpp avgx.cpp) add_definitions(-DMYSQL_DYNAMIC_PLUGIN) diff --git a/utils/udfsdk/allnull.cpp b/utils/udfsdk/allnull.cpp index b6b8d79da..247b9e28f 100644 --- a/utils/udfsdk/allnull.cpp +++ b/utils/udfsdk/allnull.cpp @@ -27,11 +27,11 @@ struct allnull_data #define OUT_TYPE int64_t mcsv1_UDAF::ReturnCode allnull::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { context->setUserDataSize(sizeof(allnull_data)); - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -52,8 +52,7 @@ mcsv1_UDAF::ReturnCode allnull::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { struct allnull_data* data = (struct allnull_data*)context->getUserData()->data; diff --git a/utils/udfsdk/allnull.h b/utils/udfsdk/allnull.h index 86697b052..da17f5d6b 100644 --- a/utils/udfsdk/allnull.h +++ b/utils/udfsdk/allnull.h @@ -103,7 +103,7 @@ public: * colTypes or wrong number of arguments. Else return * mcsv1_UDAF::SUCCESS. */ - virtual ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); /** * reset() @@ -138,7 +138,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() diff --git a/utils/udfsdk/avg_mode.cpp b/utils/udfsdk/avg_mode.cpp index f39b5e402..5429183d9 100644 --- a/utils/udfsdk/avg_mode.cpp +++ b/utils/udfsdk/avg_mode.cpp @@ -25,9 +25,9 @@ using namespace mcsv1sdk; mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("avg_mode() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode avg_mode::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; MODE_DATA& data = static_cast(context->getUserData())->mData; @@ -187,8 +186,7 @@ mcsv1_UDAF::ReturnCode avg_mode::evaluate(mcsv1Context* context, static_any::any return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; MODE_DATA& data = static_cast(context->getUserData())->mData; diff --git a/utils/udfsdk/avg_mode.h b/utils/udfsdk/avg_mode.h index 4f3442005..5722c5fea 100644 --- a/utils/udfsdk/avg_mode.h +++ b/utils/udfsdk/avg_mode.h @@ -18,7 +18,7 @@ /*********************************************************************** * $Id$ * -* mcsv1_UDAF.h +* avg_mode.h ***********************************************************************/ /** @@ -50,8 +50,8 @@ * is also used to describe the interface that is used for * either. */ -#ifndef HEADER_mode -#define HEADER_mode +#ifndef HEADER_avg_mode +#define HEADER_avg_mode #include #include @@ -134,7 +134,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -169,8 +169,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -246,8 +245,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() diff --git a/utils/udfsdk/mcsv1_udaf.cpp b/utils/udfsdk/mcsv1_udaf.cpp index 349a642ec..ee08dcc07 100644 --- a/utils/udfsdk/mcsv1_udaf.cpp +++ b/utils/udfsdk/mcsv1_udaf.cpp @@ -36,6 +36,8 @@ UDAF_MAP UDAFMap::fm; #include "ssq.h" #include "median.h" #include "avg_mode.h" +#include "regr_avgx.h" +#include "avgx.h" UDAF_MAP& UDAFMap::getMap() { if (fm.size() > 0) @@ -52,6 +54,8 @@ UDAF_MAP& UDAFMap::getMap() fm["ssq"] = new ssq(); fm["median"] = new median(); fm["avg_mode"] = new avg_mode(); + fm["regr_avgx"] = new regr_avgx(); + fm["avgx"] = new avgx(); return fm; } @@ -115,8 +119,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const { // We don't test the per row data fields. They don't determine // if it's the same Context. - if (getName() != c.getName() - || fRunFlags != c.fRunFlags + if (getName() != c.getName() + ||fRunFlags != c.fRunFlags || fContextFlags != c.fContextFlags || fUserDataSize != c.fUserDataSize || fResultType != c.fResultType @@ -125,7 +129,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const || fStartFrame != c.fStartFrame || fEndFrame != c.fEndFrame || fStartConstant != c.fStartConstant - || fEndConstant != c.fEndConstant) + || fEndConstant != c.fEndConstant + || fParamCount != c.fParamCount) return false; return true; @@ -217,6 +222,7 @@ void mcsv1Context::serialize(messageqcpp::ByteStream& b) const b << (uint32_t)fEndFrame; b << fStartConstant; b << fEndConstant; + b << fParamCount; } void mcsv1Context::unserialize(messageqcpp::ByteStream& b) @@ -238,6 +244,7 @@ void mcsv1Context::unserialize(messageqcpp::ByteStream& b) fEndFrame = (WF_FRAME)frame; b >> fStartConstant; b >> fEndConstant; + b >> fParamCount; } void UserData::serialize(messageqcpp::ByteStream& bs) const diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index d24852c28..df3f47649 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -77,6 +77,7 @@ #include "any.hpp" #include "calpontsystemcatalog.h" #include "wf_frame.h" +#include "my_decimal_limits.h" using namespace execplan; @@ -200,12 +201,8 @@ static uint64_t CONTEXT_IS_PM __attribute__ ((unused)) = 1 << 2; // Flags that describe the contents of a specific input parameter // These will be set in context->dataFlags for each method call by the framework. // User code shouldn't use these directly -static uint64_t PARAM_IS_NULL __attribute__ ((unused)) = 1; -static uint64_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; - -// shorthand for the list of columns in the call sent to init() -// first is the actual column name and second is the data type in Columnstore. -typedef std::vector >COL_TYPES; +static uint32_t PARAM_IS_NULL __attribute__ ((unused)) = 1; +static uint32_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; // This is the context class that is passed to all API callbacks // The framework potentially sets data here for each invocation of @@ -269,7 +266,9 @@ public: EXPORT bool isPM(); // Parameter refinement description accessors - // valid in nextValue and dropValue + + // How many actual parameters were entered. + // valid in all calls size_t getParameterCount() const; // Determine if an input parameter is NULL @@ -298,6 +297,7 @@ public: // This only makes sense if the return type is decimal, but should be set // to (0, -1) for other types if the inout is decimal. // valid in init() + // Set the scale to DECIMAL_NOT_SPECIFIED if you want a floating decimal. EXPORT bool setScale(int32_t scale); EXPORT bool setPrecision(int32_t precision); @@ -372,7 +372,7 @@ private: int32_t fResultscale; // For scale, the number of digits to the right of the decimal int32_t fResultPrecision; // The max number of digits allowed in the decimal value std::string errorMsg; - std::vector* dataFlags; // one entry for each parameter + uint32_t* dataFlags; // an integer array wirh one entry for each parameter bool* bInterrupted; // Gets set to true by the Framework if something happens WF_FRAME fStartFrame; // Is set to default to start, then modified by the actual frame in the call WF_FRAME fEndFrame; // Is set to default to start, then modified by the actual frame in the call @@ -380,6 +380,7 @@ private: int32_t fEndConstant; // for end frame WF_PRECEEDIMG or WF_FOLLOWING std::string functionName; mcsv1sdk::mcsv1_UDAF* func; + int32_t fParamCount; public: // For use by the framework @@ -394,13 +395,14 @@ public: EXPORT void clearContextFlag(uint64_t flag); EXPORT uint64_t getContextFlags() const; EXPORT uint32_t getUserDataSize() const; - EXPORT std::vector& getDataFlags(); - EXPORT void setDataFlags(std::vector* flags); + EXPORT uint32_t* getDataFlags(); + EXPORT void setDataFlags(uint32_t* flags); EXPORT void setInterrupted(bool interrupted); EXPORT void setInterrupted(bool* interrupted); EXPORT mcsv1sdk::mcsv1_UDAF* getFunction(); EXPORT mcsv1sdk::mcsv1_UDAF* getFunction() const; EXPORT boost::shared_ptr getUserDataSP(); + EXPORT void setParamCount(int32_t paramCount); }; // Since aggregate functions can operate on any data type, we use the following structure @@ -419,9 +421,10 @@ public: struct ColumnDatum { CalpontSystemCatalog::ColDataType dataType; // defined in calpontsystemcatalog.h - static_any::any columnData; + static_any::any columnData; // Not valid in init() uint32_t scale; // If dataType is a DECIMAL type uint32_t precision; // If dataType is a DECIMAL type + std::string alias; // Only filled in for init() ColumnDatum() : dataType(CalpontSystemCatalog::UNDEFINED), scale(0), precision(-1) {}; }; @@ -466,7 +469,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes) = 0; + ColumnDatum* colTypes) = 0; /** * reset() @@ -501,8 +504,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn) = 0; + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn) = 0; /** * subEvaluate() @@ -579,8 +581,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() @@ -640,32 +641,32 @@ inline mcsv1Context::mcsv1Context() : fEndFrame(WF_CURRENT_ROW), fStartConstant(0), fEndConstant(0), - func(NULL) + func(NULL), + fParamCount(0) { } inline mcsv1Context::mcsv1Context(const mcsv1Context& rhs) : - fContextFlags(0), - fColWidth(0), - dataFlags(NULL), - bInterrupted(NULL), - func(NULL) + dataFlags(NULL) { copy(rhs); } inline mcsv1Context& mcsv1Context::copy(const mcsv1Context& rhs) { - fRunFlags = rhs.getRunFlags(); - fResultType = rhs.getResultType(); - fUserDataSize = rhs.getUserDataSize(); - fResultscale = rhs.getScale(); - fResultPrecision = rhs.getPrecision(); + fRunFlags = rhs.fRunFlags; + fContextFlags = rhs.fContextFlags; + fResultType = rhs.fResultType; + fUserDataSize = rhs.fUserDataSize; + fColWidth = rhs.fColWidth; + fResultscale = rhs.fResultscale; + fResultPrecision = rhs.fResultPrecision; rhs.getStartFrame(fStartFrame, fStartConstant); rhs.getEndFrame(fEndFrame, fEndConstant); - functionName = rhs.getName(); - bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference - func = rhs.func; + functionName = rhs.functionName; + bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference + func = rhs.func; + fParamCount = rhs.fParamCount; return *this; } @@ -675,11 +676,7 @@ inline mcsv1Context::~mcsv1Context() inline mcsv1Context& mcsv1Context::operator=(const mcsv1Context& rhs) { - fContextFlags = 0; - fColWidth = 0; dataFlags = NULL; - bInterrupted = NULL; - func = NULL; return copy(rhs); } @@ -753,16 +750,13 @@ inline bool mcsv1Context::isPM() inline size_t mcsv1Context::getParameterCount() const { - if (dataFlags) - return dataFlags->size(); - - return 0; + return fParamCount; } inline bool mcsv1Context::isParamNull(int paramIdx) { if (dataFlags) - return (*dataFlags)[paramIdx] & PARAM_IS_NULL; + return dataFlags[paramIdx] & PARAM_IS_NULL; return false; } @@ -770,7 +764,7 @@ inline bool mcsv1Context::isParamNull(int paramIdx) inline bool mcsv1Context::isParamConstant(int paramIdx) { if (dataFlags) - return (*dataFlags)[paramIdx] & PARAM_IS_CONSTANT; + return dataFlags[paramIdx] & PARAM_IS_CONSTANT; return false; } @@ -939,18 +933,22 @@ inline uint32_t mcsv1Context::getUserDataSize() const return fUserDataSize; } -inline std::vector& mcsv1Context::getDataFlags() +inline uint32_t* mcsv1Context::getDataFlags() { - return *dataFlags; + return dataFlags; } -inline void mcsv1Context::setDataFlags(std::vector* flags) +inline void mcsv1Context::setDataFlags(uint32_t* flags) { dataFlags = flags; } -inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, - std::vector& valsDropped) +inline void mcsv1Context::setParamCount(int32_t paramCount) +{ + fParamCount = paramCount; +} + +inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { return NOT_IMPLEMENTED; } diff --git a/utils/udfsdk/median.cpp b/utils/udfsdk/median.cpp index e32d721f1..9c7e72dc3 100644 --- a/utils/udfsdk/median.cpp +++ b/utils/udfsdk/median.cpp @@ -25,9 +25,9 @@ using namespace mcsv1sdk; mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("median() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode median::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; MEDIAN_DATA& data = static_cast(context->getUserData())->mData; @@ -212,8 +211,7 @@ mcsv1_UDAF::ReturnCode median::evaluate(mcsv1Context* context, static_any::any& return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; MEDIAN_DATA& data = static_cast(context->getUserData())->mData; diff --git a/utils/udfsdk/median.h b/utils/udfsdk/median.h index d64792461..142be6ba8 100644 --- a/utils/udfsdk/median.h +++ b/utils/udfsdk/median.h @@ -134,7 +134,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -169,8 +169,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -246,8 +245,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() diff --git a/utils/udfsdk/ssq.cpp b/utils/udfsdk/ssq.cpp index 4d9ef7e10..20fdc33db 100644 --- a/utils/udfsdk/ssq.cpp +++ b/utils/udfsdk/ssq.cpp @@ -34,9 +34,9 @@ struct ssq_data #define OUT_TYPE int64_t mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -44,13 +44,13 @@ mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("ssq() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -81,8 +81,7 @@ mcsv1_UDAF::ReturnCode ssq::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; @@ -183,8 +182,7 @@ mcsv1_UDAF::ReturnCode ssq::evaluate(mcsv1Context* context, static_any::any& val return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; diff --git a/utils/udfsdk/ssq.h b/utils/udfsdk/ssq.h index 514c7a3f0..2cac61c2c 100644 --- a/utils/udfsdk/ssq.h +++ b/utils/udfsdk/ssq.h @@ -114,7 +114,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -147,8 +147,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -224,8 +223,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); protected: }; diff --git a/utils/udfsdk/udfmysql.cpp b/utils/udfsdk/udfmysql.cpp index 981651c43..dc0277ccc 100644 --- a/utils/udfsdk/udfmysql.cpp +++ b/utils/udfsdk/udfmysql.cpp @@ -490,6 +490,168 @@ extern "C" // return data->sumsq; return 0; } + +//======================================================================= + + /** + * regr_avgx connector stub + */ + struct regr_avgx_data + { + double sumx; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_avgx_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_avgx() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // TODO test for NULL in x and y + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + ++data->cnt; + data->sumx += xval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } + +//======================================================================= + + /** + * avgx connector stub. Exactly the same functionality as the + * built in avg() function. Use to test the performance of the + * API + */ + struct avgx_data + { + double sumx; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct avgx_data* data; + if (args->arg_count != 1) + { + strcpy(message,"avgx() requires one argument"); + return 1; + } + + if (!(data = (struct avgx_data*) malloc(sizeof(struct avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct avgx_data* data = (struct avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // TODO test for NULL in x and y + struct avgx_data* data = (struct avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + ++data->cnt; + data->sumx += xval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct avgx_data* data = (struct avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } } // vim:ts=4 sw=4: diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index 664b0e7de..fe1f3fd0e 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -204,8 +204,10 @@ Filters="*.c;*.C;*.cc;*.cpp;*.cp;*.cxx;*.c++;*.prg;*.pas;*.dpr;*.asm;*.s;*.bas;*.java;*.cs;*.sc;*.e;*.cob;*.html;*.rc;*.tcl;*.py;*.pl;*.d"> + + @@ -215,8 +217,10 @@ Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if"> + + diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index f302c49cd..5cd5243c5 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -52,6 +52,7 @@ using namespace joblist; namespace windowfunction { + template boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context) { @@ -142,7 +143,7 @@ template void WF_udaf::resetData() { getContext().getFunction()->reset(&getContext()); - fSet.clear(); + fDistinctSet.clear(); WindowFunctionType::resetData(); } @@ -150,8 +151,8 @@ template void WF_udaf::parseParms(const std::vector& parms) { bRespectNulls = true; - // parms[1]: respect null | ignore null - ConstantColumn* cc = dynamic_cast(parms[1].get()); + // The last parms: respect null | ignore null + ConstantColumn* cc = dynamic_cast(parms[parms.size()-1].get()); idbassert(cc != NULL); bool isNull = false; // dummy, harded coded bRespectNulls = (cc->getIntVal(fRow, isNull) > 0); @@ -167,52 +168,71 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) } mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - uint64_t colOut = fFieldIndex[0]; - uint64_t colIn = fFieldIndex[1]; - mcsv1sdk::ColumnDatum datum; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colOut); + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC); + + // Put the parameter metadata (type, scale, precision) into valsIn + mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) + { + uint64_t colIn = fFieldIndex[i+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } for (int64_t i = b; i < e; i++) { if (i % 1000 == 0 && fStep->cancelled()) break; + bool bHasNull = false; fRow.setData(getPointer(fRowData->at(i))); // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; + uint32_t flags[getContext().getParameterCount()]; - if (fRow.isNullValue(colIn) == true) + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - if (!bRespectNulls) + uint64_t colIn = fFieldIndex[k+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[k]; + flags[k] = 0; + if (fRow.isNullValue(colIn) == true) { - continue; + if (!bRespectNulls) + { + bHasNull = true; + break; + } + + flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - flag |= mcsv1sdk::PARAM_IS_NULL; + T valIn; + getValue(colIn, valIn, &datum.dataType); + + // Check for distinct, if turned on. + // Currently, distinct only works for param 1 + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + + datum.columnData = valIn; } - - flags.push_back(flag); - getContext().setDataFlags(&flags); - - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - // TODO: when we impliment distinct, we need to revist this. - if ((fDistinct) || (fSet.find(valIn) != fSet.end())) + if (bHasNull) { continue; } - datum.columnData = valIn; - - std::vector valsIn; - valsIn.push_back(datum); - rc = getContext().getFunction()->dropValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::NOT_IMPLEMENTED) @@ -442,59 +462,191 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) else if (fPrev <= e && fPrev > c) e = c; - uint64_t colIn = fFieldIndex[1]; + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC); - mcsv1sdk::ColumnDatum datum; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colOut); + // Put the parameter metadata (type, scale, precision) into valsIn + mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) + { + uint64_t colIn = fFieldIndex[i+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } if (b <= c && c <= e) getContext().setContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); else getContext().clearContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); - + bool bHasNull = false; for (int64_t i = b; i <= e; i++) { if (i % 1000 == 0 && fStep->cancelled()) break; fRow.setData(getPointer(fRowData->at(i))); - // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; - if (fRow.isNullValue(colIn) == true) + // NULL flags + uint32_t flags[getContext().getParameterCount()]; + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - if (!bRespectNulls) + uint64_t colIn = fFieldIndex[k+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[k]; + + // Turn on Null flags or skip based on respect nulls + flags[k] = 0; + if (fRow.isNullValue(colIn) == true) + { + if (!bRespectNulls) + { + bHasNull = true; + break; + } + + flags[k] |= mcsv1sdk::PARAM_IS_NULL; + } + + // MCOL-1201 Multi-Paramter calls + switch (datum.dataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + { + int64_t valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + case CalpontSystemCatalog::UDECIMAL: + { + uint64_t valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + { + double valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + { + float valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::BLOB: + { + string valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + default: + { + string errStr = "(" + colType2String[i] + ")"; + errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + + break; + } + } + // Skip if any value is NULL and respect nulls is off. + if (bHasNull) { continue; } - - flag |= mcsv1sdk::PARAM_IS_NULL; } - - flags.push_back(flag); - getContext().setDataFlags(&flags); - - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - if ((fDistinct) || (fSet.find(valIn) != fSet.end())) - { - continue; - } - - if (fDistinct) - fSet.insert(valIn); - - datum.columnData = valIn; - - std::vector valsIn; - valsIn.push_back(datum); - + getContext().setDataFlags(flags); + rc = getContext().getFunction()->nextValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) diff --git a/utils/windowfunction/wf_udaf.h b/utils/windowfunction/wf_udaf.h index babb32565..f7a4c4b08 100644 --- a/utils/windowfunction/wf_udaf.h +++ b/utils/windowfunction/wf_udaf.h @@ -21,13 +21,35 @@ #ifndef UTILS_WF_UDAF_H #define UTILS_WF_UDAF_H -#include +#ifndef _MSC_VER +#include +#else +#include +#endif #include "windowfunctiontype.h" #include "mcsv1_udaf.h" namespace windowfunction { +// Hash classes for the distinct hashmap +class DistinctHasher +{ +public: + inline size_t operator()(const static_any::any& a) const + { + return a.getHash(); + } +}; + +class DistinctEqual +{ +public: + inline bool operator()(const static_any::any& lhs, static_any::any& rhs) const + { + return lhs == rhs; + } +}; // A class to control the execution of User Define Analytic Functions (UDAnF) // as defined by a specialization of mcsv1sdk::mcsv1_UDAF @@ -72,7 +94,8 @@ protected: bool fDistinct; bool bRespectNulls; // respect null | ignore null bool bHasDropValue; // Set to false when we discover the UDAnF doesn't implement dropValue. - std::set fSet; // To hold distinct values + // To hold distinct values + std::tr1::unordered_set fDistinctSet; static_any::any fValOut; // The return value public: diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp index 950045899..4c5b4de32 100644 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -492,10 +492,10 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos) static uint64_t dateNull = joblist::DATENULL; static uint64_t datetimeNull = joblist::DATETIMENULL; static uint64_t timeNull = joblist::TIMENULL; - static uint64_t char1Null = joblist::CHAR1NULL; - static uint64_t char2Null = joblist::CHAR2NULL; - static uint64_t char4Null = joblist::CHAR4NULL; - static uint64_t char8Null = joblist::CHAR8NULL; +// static uint64_t char1Null = joblist::CHAR1NULL; +// static uint64_t char2Null = joblist::CHAR2NULL; +// static uint64_t char4Null = joblist::CHAR4NULL; +// static uint64_t char8Null = joblist::CHAR8NULL; static string stringNull(""); void* v = NULL; diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index 5d3dfec85..41c788693 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -1280,7 +1280,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -2024,10 +2024,10 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 0; k < colStructList.size(); k++) + for (size_t k = 0; k < colStructList.size(); k++) { // Skip the selected column - if (k == colId) + if (k == (size_t)colId) continue; Column expandCol; @@ -2582,7 +2582,7 @@ int WriteEngineWrapper::insertColumnRec_SYS(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -3277,7 +3277,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; From 956db53dfd270e8d6b701f8c32de11476adc4ca6 Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 11 May 2018 09:53:49 -0500 Subject: [PATCH 06/19] MCOL-1201 Add test UDAF back in after rebase --- utils/udfsdk/avgx.cpp | 257 +++++++++++++++++++++++++++++++++++ utils/udfsdk/avgx.h | 99 ++++++++++++++ utils/udfsdk/regr_avgx.cpp | 270 +++++++++++++++++++++++++++++++++++++ utils/udfsdk/regr_avgx.h | 99 ++++++++++++++ 4 files changed, 725 insertions(+) create mode 100644 utils/udfsdk/avgx.cpp create mode 100644 utils/udfsdk/avgx.h create mode 100644 utils/udfsdk/regr_avgx.cpp create mode 100644 utils/udfsdk/regr_avgx.h diff --git a/utils/udfsdk/avgx.cpp b/utils/udfsdk/avgx.cpp new file mode 100644 index 000000000..887a8418e --- /dev/null +++ b/utils/udfsdk/avgx.cpp @@ -0,0 +1,257 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "avgx.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +#define DATATYPE double + +// Use the simple data model +struct avgx_data +{ + double sum; + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode avgx::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 1) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("avgx() with other than 1 arguments"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[0].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("avgx() with a non-numeric x argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(avgx_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 4); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode avgx::reset(mcsv1Context* context) +{ + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + data->sum = 0; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_x = valsIn[0].columnData; + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[0].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum += val; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct avgx_data* outData = (struct avgx_data*)context->getUserData()->data; + struct avgx_data* inData = (struct avgx_data*)userDataIn->data; + + outData->sum += inData->sum; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + + valOut = data->sum / (double)data->cnt; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_x = valsDropped[0].columnData; + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[0].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum -= val; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/avgx.h b/utils/udfsdk/avgx.h new file mode 100644 index 000000000..0569b6091 --- /dev/null +++ b/utils/udfsdk/avgx.h @@ -0,0 +1,99 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* avgx.h +***********************************************************************/ + +/** + * Columnstore interface for for the avgx function + * + * + * CREATE AGGREGATE FUNCTION avgx returns REAL soname + * 'libudf_mysql.so'; + * + */ +#ifndef HEADER_avgx +#define HEADER_avgx + +#include +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. + +// Return the avgx value of the dataset + +class avgx : public mcsv1_UDAF +{ +public: + // Defaults OK + avgx() : mcsv1_UDAF() {}; + virtual ~avgx() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_.h + diff --git a/utils/udfsdk/regr_avgx.cpp b/utils/udfsdk/regr_avgx.cpp new file mode 100644 index 000000000..c7cc5b56e --- /dev/null +++ b/utils/udfsdk/regr_avgx.cpp @@ -0,0 +1,270 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_avgx.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +#define DATATYPE double + +// Use the simple data model +struct regr_avgx_data +{ + double sum; + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode regr_avgx::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgx() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[1].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgx() with a non-numeric x argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_avgx_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[1].scale + 4); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_avgx::reset(mcsv1Context* context) +{ + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + data->sum = 0; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (context->isParamNull(0) || context->isParamNull(1)) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + if (valIn_x.empty() || valIn_y.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[1].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum += val; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_avgx_data* outData = (struct regr_avgx_data*)context->getUserData()->data; + struct regr_avgx_data* inData = (struct regr_avgx_data*)userDataIn->data; + + outData->sum += inData->sum; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + + if (data->cnt == 0) + { + valOut = 0; + } + else + { + valOut = data->sum / (double)data->cnt; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty() || valIn_y.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[1].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum -= val; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/regr_avgx.h b/utils/udfsdk/regr_avgx.h new file mode 100644 index 000000000..f70f30d8c --- /dev/null +++ b/utils/udfsdk/regr_avgx.h @@ -0,0 +1,99 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_avgx.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_avgx function + * + * + * CREATE AGGREGATE FUNCTION regr_avgx returns REAL soname + * 'libudf_mysql.so'; + * + */ +#ifndef HEADER_regr_avgx +#define HEADER_regr_avgx + +#include +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. + +// Return the regr_avgx value of the dataset + +class regr_avgx : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_avgx() : mcsv1_UDAF() {}; + virtual ~regr_avgx() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_avgx.h + From 40aca95a7ac83e00e7e12187d94910a2fba32fd8 Mon Sep 17 00:00:00 2001 From: David Hall Date: Mon, 14 May 2018 17:28:24 -0500 Subject: [PATCH 07/19] MCOL-1201 some fixes from testing --- dbcon/joblist/tupleaggregatestep.cpp | 229 ++++++++++++--------------- dbcon/mysql/ha_calpont_execplan.cpp | 1 - utils/common/common.vpj | 2 + utils/rowgroup/rowaggregation.cpp | 4 +- 4 files changed, 106 insertions(+), 130 deletions(-) diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index ff490da5b..8f7755ad9 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -852,7 +852,6 @@ SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo) if (ac->aggOp() == ROWAGG_UDAF) { UDAFColumn* udafc = dynamic_cast(ac); - if (udafc) { constAggDataVec.push_back( @@ -1097,8 +1096,9 @@ void TupleAggregateStep::prep1PhaseAggregate( vector functionVec; uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); + // For UDAF uint32_t projColsUDAFIndex = 0; - + UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // for count column of average function map avgFuncMap; @@ -1287,12 +1287,10 @@ void TupleAggregateStep::prep1PhaseAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -1300,12 +1298,10 @@ void TupleAggregateStep::prep1PhaseAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, i)); break; } - } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -1474,8 +1470,6 @@ void TupleAggregateStep::prep1PhaseAggregate( throw logic_error("(2)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } - pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); - // Return column oidsAgg.push_back(oidsProj[colProj]); keysAgg.push_back(key); @@ -1677,8 +1671,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( uint32_t bigIntWidth = sizeof(int64_t); // map key = column key, operation (enum), and UDAF pointer if UDAF. AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; set avgSet; + + // fOR udaf + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; uint32_t projColsUDAFIndex = 0; // for count column of average function @@ -1847,7 +1844,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; if (udafc) @@ -1857,12 +1854,10 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAgg)); break; } - } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -2142,6 +2137,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // locate the return column position in aggregated rowgroup for (uint64_t i = 0; i < returnedColVec.size(); i++) { + udafc = NULL; pUDAFFunc = NULL; uint32_t retKey = returnedColVec[i].first; RowAggFunctionType aggOp = functionIdMap(returnedColVec[i].second); @@ -2150,10 +2146,21 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != @@ -2473,26 +2480,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - - for (; it != jobInfo.projectionCols.end(); it++) - { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); - break; - } - } - - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(3)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); - } + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); } else { @@ -2904,7 +2892,10 @@ void TupleAggregateStep::prep2PhasesAggregate( vector > aggColVec; set avgSet; vector >& returnedColVec = jobInfo.returnedColVec; + // For UDAF uint32_t projColsUDAFIndex = 0; + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -2947,7 +2938,6 @@ void TupleAggregateStep::prep2PhasesAggregate( uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // associate the columns between projected RG and aggregate RG on PM // populated the aggregate columns @@ -3084,12 +3074,10 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3098,10 +3086,9 @@ void TupleAggregateStep::prep2PhasesAggregate( break; } } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -3350,10 +3337,6 @@ void TupleAggregateStep::prep2PhasesAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { - // Keep a count of the parms after the first for any aggregate. - // These will be skipped and the count needs to be subtracted - // from where the aux column will be. - int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap; @@ -3369,6 +3352,8 @@ void TupleAggregateStep::prep2PhasesAggregate( } // locate the return column position in aggregated rowgroup from PM + // outIdx is i without the multi-columns, + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { uint32_t retKey = returnedColVec[i].first; @@ -3379,19 +3364,30 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_MULTI_PARM) { // Skip on UM: Extra parms for an aggregate have no work on the UM - ++multiParms; continue; } + // Is this a UDAF? use the function as part of the key. - - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; - + pUDAFFunc = NULL; + udafc = NULL; if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); @@ -3492,7 +3488,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep2PhasesAggregate: " << emsg << " oid=" @@ -3514,7 +3510,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByUm[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByUm[j]->fOutputColumnIndex = i; + groupByUm[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByUm[j]->fOutputColumnIndex; } @@ -3525,7 +3521,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (jobInfo.distinctColVec[j] == retKey) { if (groupByUm[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByUm[j]->fOutputColumnIndex = i; + groupByUm[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByUm[j]->fOutputColumnIndex; } @@ -3534,7 +3530,7 @@ void TupleAggregateStep::prep2PhasesAggregate( // a duplicate group by column if (dupGroupbyIndex != -1) functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } else { @@ -3542,30 +3538,11 @@ void TupleAggregateStep::prep2PhasesAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - - for (; it != jobInfo.projectionCols.end(); it++) - { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i-multiParms)); - break; - } - } - - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); - } + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i-multiParms)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -3600,6 +3577,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (returnedColVec[i].second == AggregateColumn::AVG) avgFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // now fix the AVG function, locate the count(column) position @@ -3617,7 +3595,7 @@ void TupleAggregateStep::prep2PhasesAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size() - multiParms; + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -3724,7 +3702,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector > aggColVec, aggNoDistColVec; set avgSet, avgDistSet; vector >& returnedColVec = jobInfo.returnedColVec; + // For UDAF uint32_t projColsUDAFIndex = 0; + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -3796,7 +3777,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( uint32_t bigIntWidth = sizeof(int64_t); map, uint64_t> avgFuncDistMap; AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // associate the columns between projected RG and aggregate RG on PM // populated the aggregate columns @@ -3940,12 +3920,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3954,10 +3932,9 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( break; } } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -4201,32 +4178,33 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // associate the columns between the aggregate RGs on PM and UM without distinct aggregator // populated the returned columns { + int64_t multiParms = 0; + for (uint32_t idx = 0; idx < groupByPm.size(); idx++) { SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(idx, idx)); groupByUm.push_back(groupby); } - // Keep a count of the parms after the first for any aggregate. - // These will be skipped and the count needs to be subtracted - // from where the aux column will be. - int64_t multiParms = 0; for (uint32_t idx = 0; idx < functionVecPm.size(); idx++) - { SP_ROWAGG_FUNC_t funct; SP_ROWAGG_FUNC_t funcPm = functionVecPm[idx]; - // UDAF support if (funcPm->fAggFunction == ROWAGG_MULTI_PARM) { - // Multi-Parm is not used on the UM + // Skip on UM: Extra parms for an aggregate have no work on the UM ++multiParms; continue; } + if (funcPm->fAggFunction == ROWAGG_UDAF) { RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); + if (!udafFuncCol) + { + throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + } funct.reset(new RowUDAFFunctionCol( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, @@ -4273,6 +4251,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // These will be skipped and the count needs to be subtracted // from where the aux column will be. int64_t multiParms = 0; + projColsUDAFIndex = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4286,9 +4265,12 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // locate the return column position in aggregated rowgroup from PM + // outIdx is i without the multi-columns, + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { pUDAFFunc = NULL; + udafc = NULL; uint32_t retKey = returnedColVec[i].first; RowAggFunctionType aggOp = functionIdMap(returnedColVec[i].second); @@ -4304,10 +4286,21 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != @@ -4436,7 +4429,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it != aggFuncMap.end()) { - colUm = it->second - multiParms; + colUm = it->second; oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(keysAggUm[colUm]); scaleAggDist.push_back(scaleAggUm[colUm]); @@ -4460,7 +4453,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // false alarm returnColMissing = false; - colUm = it->second - multiParms; + colUm = it->second; if (aggOp == ROWAGG_SUM) { @@ -4528,7 +4521,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep2PhasesDistinctAggregate: " << emsg << " oid=" @@ -4552,7 +4545,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByNoDist[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByNoDist[j]->fOutputColumnIndex = i; + groupByNoDist[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByNoDist[j]->fOutputColumnIndex; } @@ -4561,7 +4554,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // a duplicate group by column if (dupGroupbyIndex != -1) functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } else { @@ -4569,30 +4562,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - - for (; it != jobInfo.projectionCols.end(); it++) - { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i-multiParms)); - break; - } - } - - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); - } + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i-multiParms)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -4629,6 +4603,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // for (i // now fix the AVG function, locate the count(column) position @@ -4646,7 +4621,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size() - multiParms; + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -4706,7 +4681,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(5)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 5c1989d51..7ee6a775b 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4573,7 +4573,6 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) udafc->resultType(ct); } } - } catch (std::logic_error e) { diff --git a/utils/common/common.vpj b/utils/common/common.vpj index 69059884c..ea67e04ba 100755 --- a/utils/common/common.vpj +++ b/utils/common/common.vpj @@ -200,6 +200,7 @@ + @@ -208,6 +209,7 @@ Name="Header Files" Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if"> + diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index c1f5bbd63..043dcaac2 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -2015,13 +2015,13 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { - int32_t paramCount = fRGContext.getParameterCount(); + uint32_t paramCount = fRGContext.getParameterCount(); // The vector of parameters to be sent to the UDAF mcsv1sdk::ColumnDatum valsIn[paramCount]; uint32_t dataFlags[paramCount]; execplan::CalpontSystemCatalog::ColDataType colDataType; - for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) + for (uint32_t i = 0; i < paramCount; ++i) { mcsv1sdk::ColumnDatum& datum = valsIn[i]; // Turn on NULL flags From bac8c2d43b1e623cfb4509fecd944824e1c4fb9f Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 15 May 2018 13:15:45 -0500 Subject: [PATCH 08/19] MCOL-1201 Modify docs. Fix group concat bug --- dbcon/mysql/ha_calpont_execplan.cpp | 1 + utils/udfsdk/docs/source/changelog.rst | 1 + .../docs/source/reference/ColumnDatum.rst | 6 ++-- .../docs/source/reference/MariaDBUDAF.rst | 2 +- .../udfsdk/docs/source/reference/UDAFMap.rst | 2 +- .../docs/source/reference/mcsv1Context.rst | 2 +- .../docs/source/reference/mcsv1_UDAF.rst | 36 ++++++++----------- utils/udfsdk/docs/source/usage/cmakelists.rst | 2 +- utils/udfsdk/docs/source/usage/compile.rst | 2 +- utils/udfsdk/docs/source/usage/headerfile.rst | 6 ++-- .../udfsdk/docs/source/usage/introduction.rst | 4 +-- utils/udfsdk/docs/source/usage/sourcefile.rst | 29 +++++++-------- utils/udfsdk/udfsdk.vpj | 33 +++++++++++++++++ 13 files changed, 75 insertions(+), 51 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 7ee6a775b..395d24404 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4165,6 +4165,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) rowCol->columnVec(selCols); (dynamic_cast(ac))->orderCols(orderCols); parm.reset(rowCol); + ac->aggParms().push_back(parm); if (gc->str_separator()) { diff --git a/utils/udfsdk/docs/source/changelog.rst b/utils/udfsdk/docs/source/changelog.rst index fcd93d54c..1a7c749f9 100644 --- a/utils/udfsdk/docs/source/changelog.rst +++ b/utils/udfsdk/docs/source/changelog.rst @@ -5,4 +5,5 @@ Version History | Version | Date | Changes | +=========+============+=============================+ | 1.1.0α | 2017-08-25 | - First alpha release | +| 1.2.0α | 2016-05-18 | - Add multi parm support | +---------+------------+-----------------------------+ diff --git a/utils/udfsdk/docs/source/reference/ColumnDatum.rst b/utils/udfsdk/docs/source/reference/ColumnDatum.rst index dd1006363..5304a2953 100644 --- a/utils/udfsdk/docs/source/reference/ColumnDatum.rst +++ b/utils/udfsdk/docs/source/reference/ColumnDatum.rst @@ -1,3 +1,5 @@ +.. _ColumnDatum: + ColumnDatum =========== @@ -13,7 +15,7 @@ Example for int data: int myint = valIn.cast(); -For multi-paramter aggregations (not available in Columnstore 1.1), the colsIn vector of next_value() contains the ordered set of row parameters. +For multi-paramter aggregations (not available in Columnstore 1.1), the colsIn array of next_value() contains the ordered set of row parameters. For char, varchar, text, varbinary and blob types, columnData will be std::string. @@ -59,7 +61,7 @@ The provided values are: * - SMALLINT - A signed two byte integer * - DECIMAL - - A Columnstore Decimal value. For Columnstore 1.1, this is stored in the smallest integer type field that will hold the required precision. + - A Columnstore Decimal value. This is stored in the smallest integer type field that will hold the required precision. * - MEDINT - A signed four byte integer * - INT diff --git a/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst b/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst index 1f6fa7acb..d031705d8 100644 --- a/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst +++ b/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst @@ -13,7 +13,7 @@ The library placed in mysql/lib is the name you use in the SQL CREATE AGGREGATE CREATE AGGREGATE FUNCTION ssq returns REAL soname 'libudf_mysql.so'; -Unlike the code you write for the Columnstore UDAF, MariaDB does not handle allocation and de-allocation of your memory structures. If writing your function for other engines, you must handle allocation and de-alloaction in :ref:`function_init ` and :ref:`function_deinit ` +Unlike the code you write for the Columnstore UDAF, MariaDB does not handle allocation and de-allocation of your memory structures in other engines. If writing your function for other engines, you must handle allocation and de-alloaction in :ref:`function_init ` and :ref:`function_deinit ` All of the MariaDB UDF and UDAF example functions are in a single source file named udfmysql.cpp and linked into libudf_mysql.so. diff --git a/utils/udfsdk/docs/source/reference/UDAFMap.rst b/utils/udfsdk/docs/source/reference/UDAFMap.rst index 48706bab3..d3cda63f4 100644 --- a/utils/udfsdk/docs/source/reference/UDAFMap.rst +++ b/utils/udfsdk/docs/source/reference/UDAFMap.rst @@ -3,7 +3,7 @@ UDAFMap ======= -The UDAFMap is where we tell the system about our function. For Columnstore 1.1, you must manually place your function into this map. +The UDAFMap is where we tell the system about our function. For Columnstore 1.2, you must manually place your function into this map. * open mcsv1_udaf.cpp * add your header to the #include list diff --git a/utils/udfsdk/docs/source/reference/mcsv1Context.rst b/utils/udfsdk/docs/source/reference/mcsv1Context.rst index 279220fb3..02adf57ab 100644 --- a/utils/udfsdk/docs/source/reference/mcsv1Context.rst +++ b/utils/udfsdk/docs/source/reference/mcsv1Context.rst @@ -150,7 +150,7 @@ Use these to determine the way your UDA(n)F was called .. c:function:: size_t getParameterCount() const; -:returns: the number of parameters to the function in the SQL query. Columnstore 1.1 only supports one parameter. +:returns: the number of parameters to the function in the SQL query. .. c:function:: bool isParamNull(int paramIdx); diff --git a/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst b/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst index 73c8f6570..f75fe73fc 100644 --- a/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst +++ b/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst @@ -1,4 +1,4 @@ -.. _ mcsv1_udaf: +.. _mcsv1_udaf: mcsv1_UDAF ========== @@ -11,12 +11,14 @@ The base class has no data members. It is designed to be only a container for yo However, adding static const members makes sense. -For UDAF (not Wndow Functions) Aggregation takes place in three stages: +For UDAF (not Window Functions) Aggregation takes place in three stages: * Subaggregation on the PM. nextValue() * Consolodation on the UM. subevaluate() * Evaluation of the function on the UM. evaluate() +There are situations where the system makes a choice to perform all UDAF calculations on the UM. The presence of group_concat() in the query and certain joins can cause the optimizer to make this choice. + For Window Functions, all aggregation occurs on the UM, and thus the subevaluate step is skipped. There is an optional dropValue() function that may be added. * Aggregation on the UM. nextValue() @@ -80,17 +82,11 @@ Callback Methods .. _init: -.. c:function:: ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); +.. c:function:: ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); :param context: The context object for this call. -:param colTypes: A list of the column types of the parameters. - - COL_TYPES is defined as:: - - typedef std::vector >COL_TYPES; - - In Columnstore 1.1, only one column is supported, so colTyoes will be of length one. +:param colTypes: A list of ColumnDatum structures. Use this to access the column types of the parameters. colTypes.columnData will be invalid. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS @@ -116,25 +112,23 @@ Callback Methods .. _nextvalue: -.. c:function:: ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); +.. c:function:: ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); :param context: The context object for this call -:param valsIn: a vector representing the values to be added for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. - +:param valsIn: an array representing the values to be added for each parameter for this row. + :returns: ReturnCode::ERROR or ReturnCode::SUCCESS Use context->getUserData() and type cast it to your UserData type or Simple Data Model stuct. nextValue() is called for each Window movement that passes the WHERE and HAVING clauses. The context's UserData will contain values that have been sub-aggregated to this point for the group, partition or Window Frame. nextValue is called on the PM for aggregation and on the UM for Window Functions. - When used in an aggregate, the function may not rely on order or completeness since the sub-aggregation is going on at the PM, it only has access to the data stored on the PM's dbroots. + When used in an aggregate, the function should not rely on order or completeness since the sub-aggregation is going on at the PM, it only has access to the data stored on the PM's dbroots. - When used as a analytic function (Window Function), nextValue is call for each Window movement in the Window. If dropValue is defined, then it may be called for every value leaving the Window, and nextValue called for each new value entering the Window. + When used as a analytic function (Window Function), nextValue is called for each Window movement in the Window. If dropValue is defined, then it may be called for every value leaving the Window, and nextValue called for each new value entering the Window. - Since this is called for every row, it is important that this method be efficient. + Since this may called for every row, it is important that this method be efficient. .. _subevaluate: @@ -172,13 +166,11 @@ Callback Methods .. _dropvalue: -.. c:function:: ReturnCode dropValue(mcsv1Context* context, std::vector& valsDropped); +.. c:function:: ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); :param context: The context object for this call -:param valsDropped: a vector representing the values to be dropped for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. +:param valsDropped: an array representing the values to be dropped for each parameter for this row. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS diff --git a/utils/udfsdk/docs/source/usage/cmakelists.rst b/utils/udfsdk/docs/source/usage/cmakelists.rst index 32a218459..a7ddacbaf 100644 --- a/utils/udfsdk/docs/source/usage/cmakelists.rst +++ b/utils/udfsdk/docs/source/usage/cmakelists.rst @@ -3,7 +3,7 @@ CMakeLists.txt ============== -For Columnstore 1.1, you compile your function by including it in the CMakeLists.txt file for the udfsdk. +For Columnstore 1.2, you compile your function by including it in the CMakeLists.txt file for the udfsdk. You need only add the new .cpp files to the udfsdk_LIB_SRCS target list:: diff --git a/utils/udfsdk/docs/source/usage/compile.rst b/utils/udfsdk/docs/source/usage/compile.rst index e6319e45b..b96af5d80 100644 --- a/utils/udfsdk/docs/source/usage/compile.rst +++ b/utils/udfsdk/docs/source/usage/compile.rst @@ -3,7 +3,7 @@ Compile ======= -To compile your function for Columnstore 1.1, simple recompile the udfsdk directory:: +To compile your function for Columnstore 1.2, simply recompile the udfsdk directory:: cd utils/usdsdk cmake . diff --git a/utils/udfsdk/docs/source/usage/headerfile.rst b/utils/udfsdk/docs/source/usage/headerfile.rst index 720acc5be..afb043e98 100644 --- a/utils/udfsdk/docs/source/usage/headerfile.rst +++ b/utils/udfsdk/docs/source/usage/headerfile.rst @@ -5,7 +5,7 @@ Header file Usually, each UDA(n)F function will have one .h and one .cpp file plus code for the mariadb UDAF plugin which may or may not be in a separate file. It is acceptable to put a set of related functions in the same files or use separate files for each. -The easiest way to create these files is to copy them an example closest to the type of function you intend to create. +The easiest way to create these files is to copy them from an example closest to the type of function you intend to create. Your header file must have a class defined that will implement your function. This class must be derived from mcsv1_UDAF and be in the mcsv1sdk namespace. The following examples use the "allnull" UDAF. @@ -29,9 +29,9 @@ allnull uses the Simple Data Model. See :ref:`complexdatamodel` to see how that allnull() : mcsv1_UDAF(){}; virtual ~allnull(){}; - virtual ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); virtual ReturnCode reset(mcsv1Context* context); - virtual ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn); virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); }; diff --git a/utils/udfsdk/docs/source/usage/introduction.rst b/utils/udfsdk/docs/source/usage/introduction.rst index 6b3544a1e..19c612caa 100644 --- a/utils/udfsdk/docs/source/usage/introduction.rst +++ b/utils/udfsdk/docs/source/usage/introduction.rst @@ -3,7 +3,7 @@ mcsv1_udaf Introduction mcsv1_udaf is a C++ API for writing User Defined Aggregate Functions (UDAF) and User Defined Analytic Functions (UDAnF) for the MariaDB Columstore engine. -In Columnstore 1.1.0, functions written using this API must be compiled into the udfsdk and udf_mysql libraries of the Columnstore code branch. +In Columnstore 1.2, functions written using this API must be compiled into the udfsdk and udf_mysql libraries of the Columnstore code branch. The API has a number of features. The general theme is, there is a class that represents the function, there is a context under which the function operates, and there is a data store for intermediate values. @@ -18,5 +18,5 @@ The steps required to create a function are: * :ref:`Compile udfsdk `. * :ref:`Copy the compiled libraries ` to the working directories. -In 1.1.0, Columnstore does not have a plugin framework, so the functions have to be compiled into the libraries that Columnstore already loads. +In 1.2, Columnstore does not have a plugin framework, so the functions have to be compiled into the libraries that Columnstore already loads. diff --git a/utils/udfsdk/docs/source/usage/sourcefile.rst b/utils/udfsdk/docs/source/usage/sourcefile.rst index b7ed38a32..5c43f29e4 100644 --- a/utils/udfsdk/docs/source/usage/sourcefile.rst +++ b/utils/udfsdk/docs/source/usage/sourcefile.rst @@ -34,21 +34,17 @@ Or, if using the :ref:`complexdatamodel`, type cast the UserData to your UserDat init() ------ -.. c:function:: ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); +.. c:function:: ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); :param context: The context object for this call. -:param colTypes: A list of the column types of the parameters. +:param colTypes: A list of the ColumnDatum used to access column types of the parameters. In init(), the columnData member is invalid. - COL_TYPES is defined as:: - - typedef std::vector >COL_TYPES; - - see :ref:`ColDataTypes `. In Columnstore 1.1, only one column is supported, so colTyoes will be of length one. + see :ref:`ColumnDatum`. In Columnstore 1.2, An arbitrary number of parameters is supported. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS -The init() method is where you sanity check the input, set the output type and set any run flags for this instance. init() is called one time from the mysqld process. All settings you do here are propagated through the system. +The init() method is where you sanity check the input datatypes, set the output type and set any run flags for this instance. init() is called one time from the mysqld process. All settings you do here are propagated through the system. init() is the exception to type casting the UserData member of context. UserData has not been created when init() is called, so you shouldn't use it here. @@ -60,13 +56,14 @@ If you're using :ref:`simpledatamodel`, you need to set the size of the structur .. rubric:: Check parameter count and type -Each function expects a certain number of columns to entered as parameters in the SQL query. For columnstore 1.1, the number of parameters is limited to one. +Each function expects a certain number of columns to be entered as parameters in the SQL query. It is possible to create a UDAF that accepts a variable number of parameters. You can discover which ones were actually used in init(), and modify your function's behavior accordingly. -colTypes is a vector of each parameter name and type. The name is the colum name from the SQL query. You can use this information to sanity check for compatible type(s) and also to modify your functions behavior based on type. To do this, add members to your data struct to be tested in the other Methods. Set these members based on colDataTypes (:ref:`ColDataTypes `). +colTypes is an array of ColumnData from which can be gleaned the type and name. The name is the column name from the SQL query. You can use this information to sanity check for compatible type(s) and also to modify your functions behavior based on type. To do this, add members to your data struct to be tested in the other Methods. Set these members based on colDataTypes (:ref:`ColDataTypes `). +The actual number of paramters passed can be gotten from context->getParameterCount(). :: - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -84,7 +81,7 @@ When you create your function using the SQL CREATE FUNCTION command, you must in .. rubric:: Set width and scale -If you have secial requirements, especially if you might be dealing with decimal types:: +If you have special requirements, especially if you might be dealing with decimal types:: context->setColWidth(8); context->setScale(context->getScale()*2); @@ -117,13 +114,11 @@ This function may be called multiple times from both the UM and the PM. Make no nextValue() ----------- -.. c:function:: ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); +.. c:function:: ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); :param context: The context object for this call -:param valsIn: a vector representing the values to be added for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. +:param valsIn: an array representing the values to be added for each parameter for this row. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS @@ -208,7 +203,7 @@ For AVG, you might see:: dropValue --------- -.. c:function:: ReturnCode dropValue(mcsv1Context* context, std::vector& valsDropped); +.. c:function:: ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); :param context: The context object for this call diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index fe1f3fd0e..3d3ac39ca 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -238,5 +238,38 @@ N="Makefile" Type="Makefile"/> + + + + + + + + + + + + + + + + + + + + + + + + + + + From 0f617896d9b6fd82ef23dd5901133f1661e19866 Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 25 May 2018 12:56:29 -0500 Subject: [PATCH 09/19] MCOL-1201 Add support for UDAF multiple parm constants --- dbcon/execplan/constantcolumn.h | 2 + dbcon/joblist/jlf_common.cpp | 2 +- dbcon/joblist/joblistfactory.cpp | 10 +- dbcon/joblist/tupleaggregatestep.cpp | 106 +++++-- dbcon/joblist/windowfunctionstep.cpp | 9 +- dbcon/mysql/ha_calpont_execplan.cpp | 38 ++- .../primproc/batchprimitiveprocessor.cpp | 16 +- utils/common/any.hpp | 7 +- utils/loggingcpp/errorcodes.cpp | 2 +- utils/messageqcpp/bytestream.h | 1 + utils/rowgroup/rowaggregation.cpp | 198 +++++++++---- utils/rowgroup/rowaggregation.h | 36 ++- utils/udfsdk/allnull.h | 1 - utils/udfsdk/avg_mode.h | 1 - utils/udfsdk/avgx.h | 1 - utils/udfsdk/mcsv1_udaf.h | 1 - utils/udfsdk/median.h | 1 - utils/udfsdk/regr_avgx.cpp | 6 +- utils/udfsdk/regr_avgx.h | 1 - utils/udfsdk/ssq.h | 1 - utils/udfsdk/udfsdk.vpj | 33 --- utils/windowfunction/wf_udaf.cpp | 276 +++++++++++------- utils/windowfunction/wf_udaf.h | 2 - utils/windowfunction/windowfunctiontype.cpp | 24 +- utils/windowfunction/windowfunctiontype.h | 7 +- 25 files changed, 508 insertions(+), 274 deletions(-) diff --git a/dbcon/execplan/constantcolumn.h b/dbcon/execplan/constantcolumn.h index 04098faae..be0731044 100644 --- a/dbcon/execplan/constantcolumn.h +++ b/dbcon/execplan/constantcolumn.h @@ -38,6 +38,8 @@ class ByteStream; */ namespace execplan { +class ConstantColumn; + /** * @brief A class to represent a constant return column * diff --git a/dbcon/joblist/jlf_common.cpp b/dbcon/joblist/jlf_common.cpp index f5dbeee17..4b1980d49 100644 --- a/dbcon/joblist/jlf_common.cpp +++ b/dbcon/joblist/jlf_common.cpp @@ -405,7 +405,7 @@ uint32_t getTupleKey(JobInfo& jobInfo, const SRCP& srcp, bool add) if (add) { - // setTupleInfo first if add is ture, ok if already set. + // setTupleInfo first if add is true, ok if already set. const SimpleColumn* sc = dynamic_cast(srcp.get()); if (sc != NULL) diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index 4cf7bccc5..033bf2643 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -300,6 +300,7 @@ const JobStepVector doProject(const RetColsVector& retCols, JobInfo& jobInfo) { const ArithmeticColumn* ac = NULL; const FunctionColumn* fc = NULL; + const ConstantColumn* cc = NULL; uint64_t eid = -1; CalpontSystemCatalog::ColType ct; ExpressionStep* es = new ExpressionStep(jobInfo); @@ -316,6 +317,11 @@ const JobStepVector doProject(const RetColsVector& retCols, JobInfo& jobInfo) eid = fc->expressionId(); ct = fc->resultType(); } + else if ((cc = dynamic_cast(retCols[i].get())) != NULL) + { + eid = cc->expressionId(); + ct = cc->resultType(); + } else { std::ostringstream errmsg; @@ -1004,7 +1010,9 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo for (uint32_t parm = 0; parm < aggParms.size(); ++parm) { - if (aggc->constCol().get() != NULL) + // Only do the optimization of converting to count(*) if + // there is only one parameter. + if (aggParms.size() == 1 && aggc->constCol().get() != NULL) { // replace the aggregate on constant with a count(*) SRCP clone; diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 8f7755ad9..491f86a8f 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -1097,7 +1097,8 @@ void TupleAggregateStep::prep1PhaseAggregate( uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); // For UDAF - uint32_t projColsUDAFIndex = 0; + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // for count column of average function @@ -1286,11 +1287,11 @@ void TupleAggregateStep::prep1PhaseAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -1477,6 +1478,14 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(udafFuncCol->fUDAFContext.getPrecision()); typeAgg.push_back(udafFuncCol->fUDAFContext.getResultType()); widthAgg.push_back(udafFuncCol->fUDAFContext.getColWidth()); + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } @@ -1488,6 +1497,13 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); + // If the param is const + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; } break; @@ -1676,7 +1692,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // fOR udaf UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; - uint32_t projColsUDAFIndex = 0; + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; // for count column of average function map avgFuncMap, avgDistFuncMap; @@ -1840,12 +1857,12 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { @@ -2071,6 +2088,14 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(CalpontSystemCatalog::UBIGINT); widthAgg.push_back(sizeof(uint64_t)); funct->fAuxColumnIndex = colAgg++; + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } @@ -2083,6 +2108,13 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(widthProj[colProj]); ++colAgg; + // If the param is const + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; } break; @@ -2133,7 +2165,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i)); } - projColsUDAFIndex = 0; + projColsUDAFIdx = 0; // locate the return column position in aggregated rowgroup for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -2146,11 +2178,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -2893,7 +2925,8 @@ void TupleAggregateStep::prep2PhasesAggregate( set avgSet; vector >& returnedColVec = jobInfo.returnedColVec; // For UDAF - uint32_t projColsUDAFIndex = 0; + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; @@ -3073,11 +3106,11 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3305,6 +3338,14 @@ void TupleAggregateStep::prep2PhasesAggregate( typeAggPm.push_back(CalpontSystemCatalog::UBIGINT); widthAggPm.push_back(bigUintWidth); funct->fAuxColumnIndex = colAggPm++; + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } @@ -3317,6 +3358,13 @@ void TupleAggregateStep::prep2PhasesAggregate( typeAggPm.push_back(typeProj[colProj]); widthAggPm.push_back(width[colProj]); colAggPm++; + // If the param is const + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; } break; @@ -3342,7 +3390,7 @@ void TupleAggregateStep::prep2PhasesAggregate( map avgFuncMap; AGG_MAP aggDupFuncMap; - projColsUDAFIndex = 0; + projColsUDAFIdx = 0; // copy over the groupby vector // update the outputColumnIndex if returned for (uint64_t i = 0; i < groupByPm.size(); i++) @@ -3372,12 +3420,12 @@ void TupleAggregateStep::prep2PhasesAggregate( udafc = NULL; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3703,7 +3751,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( set avgSet, avgDistSet; vector >& returnedColVec = jobInfo.returnedColVec; // For UDAF - uint32_t projColsUDAFIndex = 0; + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; @@ -3919,11 +3968,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -4147,6 +4196,14 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( typeAggPm.push_back(CalpontSystemCatalog::UBIGINT); widthAggPm.push_back(sizeof(uint64_t)); funct->fAuxColumnIndex = colAggPm++; + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } @@ -4160,6 +4217,13 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( widthAggPm.push_back(width[colProj]); multiParmIndexes.push_back(colAggPm); colAggPm++; + // If the param is const + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; } break; @@ -4251,7 +4315,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // These will be skipped and the count needs to be subtracted // from where the aux column will be. int64_t multiParms = 0; - projColsUDAFIndex = 0; + projColsUDAFIdx = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4286,11 +4350,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); diff --git a/dbcon/joblist/windowfunctionstep.cpp b/dbcon/joblist/windowfunctionstep.cpp index 4d24f0b4b..2a93f680b 100644 --- a/dbcon/joblist/windowfunctionstep.cpp +++ b/dbcon/joblist/windowfunctionstep.cpp @@ -569,6 +569,7 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) for (RetColsVector::iterator i = jobInfo.windowCols.begin(); i < jobInfo.windowCols.end(); i++) { + bool isUDAF = false; // window function type WindowFunctionColumn* wc = dynamic_cast(i->get()); uint64_t ridx = getColumnIndex(*i, colIndexMap, jobInfo); // result index @@ -590,6 +591,7 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) // if (boost::iequals(wc->functionName(),"UDAF_FUNC") if (wc->functionName() == "UDAF_FUNC") { + isUDAF = true; ++wfsUserFunctionCount; } @@ -646,10 +648,13 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) // column type for functor templates int ct = 0; + if (isUDAF) + { + ct = wc->getUDAFContext().getResultType(); + } // make sure index is in range - if (fields.size() > 1 && fields[1] >= 0 && static_cast(fields[1]) < types.size()) + else if (fields.size() > 1 && fields[1] >= 0 && static_cast(fields[1]) < types.size()) ct = types[fields[1]]; - // workaround for functions using "within group (order by)" syntax string fn = boost::to_upper_copy(wc->functionName()); diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 701e1c14f..b02712409 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4206,8 +4206,8 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // treat as count(*) if (ac->aggOp() == AggregateColumn::COUNT) ac->aggOp(AggregateColumn::COUNT_ASTERISK); - - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + parm.reset(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError)); + ac->constCol(parm); break; } @@ -4485,17 +4485,20 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will // be applied in ExeMgr. When the ExeMgr fix is available, this checking // will be taken out. - if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + if (isp->sum_func() != Item_sum::UDF_SUM_FUNC) { - gwi.fatalParseError = true; - gwi.parseErrorText = "No project column found for aggregate function"; - if (ac) - delete ac; - return NULL; - } - else if (ac->constCol()) - { - gwi.count_asterisk_list.push_back(ac); + if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "No project column found for aggregate function"; + if (ac) + delete ac; + return NULL; + } + else if (ac->constCol()) + { + gwi.count_asterisk_list.push_back(ac); + } } // For UDAF, populate the context and call the UDAF init() function. @@ -7903,8 +7906,15 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i setError(gwi.thd, ER_INTERNAL_ERROR, gwi.parseErrorText, gwi); return ER_CHECK_NOT_IMPLEMENTED; } - - (*coliter)->aggParms().push_back(minSc); + // Replace the last (presumably constant) object with minSc + if ((*coliter)->aggParms().empty()) + { + (*coliter)->aggParms().push_back(minSc); + } + else + { + (*coliter)->aggParms()[0] = minSc; + } } std::vector::iterator funciter; diff --git a/primitives/primproc/batchprimitiveprocessor.cpp b/primitives/primproc/batchprimitiveprocessor.cpp index bc56a7430..019761d39 100644 --- a/primitives/primproc/batchprimitiveprocessor.cpp +++ b/primitives/primproc/batchprimitiveprocessor.cpp @@ -1677,15 +1677,11 @@ void BatchPrimitiveProcessor::execute() } catch (logging::QueryDataExcept& qex) { - ostringstream os; - os << qex.what() << endl; - writeErrorMsg(os.str(), qex.errorCode()); + writeErrorMsg(qex.what(), qex.errorCode()); } catch (logging::DictionaryBufferOverflow& db) { - ostringstream os; - os << db.what() << endl; - writeErrorMsg(os.str(), db.errorCode()); + writeErrorMsg(db.what(), db.errorCode()); } catch (scalar_exception& se) { @@ -1758,15 +1754,11 @@ void BatchPrimitiveProcessor::execute() } catch (IDBExcept& iex) { - ostringstream os; - os << iex.what() << endl; - writeErrorMsg(os.str(), iex.errorCode(), true, false); + writeErrorMsg(iex.what(), iex.errorCode(), true, false); } catch (const std::exception& ex) { - ostringstream os; - os << ex.what() << endl; - writeErrorMsg(os.str(), logging::batchPrimitiveProcessorErr); + writeErrorMsg(ex.what(), logging::batchPrimitiveProcessorErr); } catch (...) { diff --git a/utils/common/any.hpp b/utils/common/any.hpp index 5408c5c87..63d05d3d2 100755 --- a/utils/common/any.hpp +++ b/utils/common/any.hpp @@ -11,15 +11,12 @@ #include #include +#include namespace static_any { namespace anyimpl { - struct bad_any_cast - { - }; - struct empty_any { }; @@ -266,7 +263,7 @@ public: T& cast() { if (policy != anyimpl::get_policy()) - throw anyimpl::bad_any_cast(); + throw std::runtime_error("static_any: type mismatch in cast"); T* r = reinterpret_cast(policy->get_value(&object)); return *r; } diff --git a/utils/loggingcpp/errorcodes.cpp b/utils/loggingcpp/errorcodes.cpp index 60919c906..4b4196800 100644 --- a/utils/loggingcpp/errorcodes.cpp +++ b/utils/loggingcpp/errorcodes.cpp @@ -29,7 +29,7 @@ using namespace std; namespace logging { -ErrorCodes::ErrorCodes(): fErrorCodes(), fPreamble("An unexpected condition within the query caused an internal processing error within InfiniDB. Please check the log files for more details. Additional Information: ") +ErrorCodes::ErrorCodes(): fErrorCodes(), fPreamble("An unexpected condition within the query caused an internal processing error within Columnstore. Please check the log files for more details. Additional Information: ") { fErrorCodes[batchPrimitiveStepErr] = "error in BatchPrimitiveStep."; fErrorCodes[tupleBPSErr] = "error in TupleBPS."; diff --git a/utils/messageqcpp/bytestream.h b/utils/messageqcpp/bytestream.h index d1a3f4988..f8453843e 100644 --- a/utils/messageqcpp/bytestream.h +++ b/utils/messageqcpp/bytestream.h @@ -35,6 +35,7 @@ #include "exceptclasses.h" #include "serializeable.h" +#include "any.hpp" class ByteStreamTestSuite; diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 043dcaac2..6339554f1 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -1723,17 +1723,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(pFunctionCol.get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF, i); - } - else - { - throw logic_error("(3)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colOut + 1, i); break; } @@ -2012,31 +2002,60 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu fRow.setLongDoubleField(fRow.getLongDoubleField(colAux + 1) + valIn * valIn, colAux + 1); } -void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) +void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, + int64_t colAux, uint64_t& funcColsIdx) { uint32_t paramCount = fRGContext.getParameterCount(); // The vector of parameters to be sent to the UDAF mcsv1sdk::ColumnDatum valsIn[paramCount]; uint32_t dataFlags[paramCount]; - + ConstantColumn* cc; + bool bIsNull = false; execplan::CalpontSystemCatalog::ColDataType colDataType; for (uint32_t i = 0; i < paramCount; ++i) { + // If UDAF_IGNORE_NULLS is on, bIsNull gets set the first time + // we find a null. We still need to eat the rest of the parameters + // to sync updateEntry + if (bIsNull) + { + ++funcColsIdx; + continue; + } + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; mcsv1sdk::ColumnDatum& datum = valsIn[i]; // Turn on NULL flags dataFlags[i] = 0; - if (isNull(&fRowGroupIn, rowIn, colIn) == true) + + // If this particular parameter is a constant, then we need + // to acces the constant value rather than a row value. + cc = NULL; + if (pFunctionCol->fpConstCol) + { + cc = dynamic_cast(pFunctionCol->fpConstCol.get()); + } + + if ((cc && cc->type() == ConstantColumn::NULLDATA) + || (!cc && isNull(&fRowGroupIn, rowIn, colIn) == true)) { if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { - return; + bIsNull = true; + ++funcColsIdx; + continue; } dataFlags[i] |= mcsv1sdk::PARAM_IS_NULL; } - - colDataType = fRowGroupIn.getColTypes()[colIn]; - if (!fRGContext.isParamNull(i)) + + if (cc) + { + colDataType = cc->resultType().colDataType; + } + else + { + colDataType = fRowGroupIn.getColTypes()[colIn]; + } + if (!(dataFlags[i] & mcsv1sdk::PARAM_IS_NULL)) { switch (colDataType) { @@ -2045,13 +2064,38 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::MEDINT: case execplan::CalpontSystemCatalog::INT: case execplan::CalpontSystemCatalog::BIGINT: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + if (cc) + { + datum.columnData = cc->getIntVal(const_cast(rowIn), bIsNull); + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + } + break; + } case execplan::CalpontSystemCatalog::DECIMAL: case execplan::CalpontSystemCatalog::UDECIMAL: { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - datum.scale = fRowGroupIn.getScale()[colIn]; - datum.precision = fRowGroupIn.getPrecision()[colIn]; + datum.dataType = colDataType; + if (cc) + { + datum.columnData = cc->getDecimalVal(const_cast(rowIn), bIsNull).value; + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + } break; } @@ -2062,7 +2106,14 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::UBIGINT: { datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); + if (cc) + { + datum.columnData = cc->getUintVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getUintField(colIn); + } break; } @@ -2070,7 +2121,14 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::UDOUBLE: { datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; - datum.columnData = rowIn.getDoubleField(colIn); + if (cc) + { + datum.columnData = cc->getDoubleVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getDoubleField(colIn); + } break; } @@ -2078,22 +2136,55 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::UFLOAT: { datum.dataType = execplan::CalpontSystemCatalog::FLOAT; - datum.columnData = rowIn.getFloatField(colIn); + if (cc) + { + datum.columnData = cc->getFloatVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getFloatField(colIn); + } break; } case execplan::CalpontSystemCatalog::DATE: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + if (cc) + { + datum.columnData = cc->getDateIntVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getUintField(colIn); + } + break; + } case execplan::CalpontSystemCatalog::DATETIME: { datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); + if (cc) + { + datum.columnData = cc->getDatetimeIntVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getUintField(colIn); + } break; } case execplan::CalpontSystemCatalog::TIME: { datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); + if (cc) + { + datum.columnData = cc->getTimeIntVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getIntField(colIn); + } break; } @@ -2105,7 +2196,14 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::BLOB: { datum.dataType = colDataType; - datum.columnData = rowIn.getStringField(colIn); + if (cc) + { + datum.columnData = cc->getStrVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getStringField(colIn); + } break; } @@ -2147,6 +2245,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { + RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColsIdx].get()); rowUDAF->bInterrupted = true; throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } @@ -2443,17 +2542,7 @@ void RowAggregationUM::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); - } - else - { - throw logic_error("(5)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colAux, i); break; } @@ -3991,17 +4080,7 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); - } - else - { - throw logic_error("(6)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colAux, i); break; } @@ -4199,8 +4278,8 @@ void RowAggregationUMP2::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut // colAux(in) - Where the UDAF userdata resides // rowUDAF(in) - pointer to the RowUDAFFunctionCol for this UDAF instance //------------------------------------------------------------------------------ -void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) +void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, + int64_t colAux, uint64_t& funcColsIdx) { static_any::any valOut; @@ -4235,6 +4314,7 @@ void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { + RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColsIdx].get()); rowUDAF->bInterrupted = true; throw logging::IDBExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } @@ -4429,17 +4509,7 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); - } - else - { - throw logic_error("(7)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colAux, i); break; } diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index 282f354fc..14e4313cf 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -50,6 +50,7 @@ #include "stlpoolallocator.h" #include "returnedcolumn.h" #include "mcsv1_udaf.h" +#include "constantcolumn.h" // To do: move code that depends on joblist to a proper subsystem. namespace joblist @@ -200,6 +201,13 @@ struct RowAggFunctionCol // 4. for duplicate - point to the real aggretate column to be copied from // Set only on UM, the fAuxColumnIndex is defaulted to fOutputColumnIndex+1 on PM. uint32_t fAuxColumnIndex; + + // For UDAF that have more than one parameter and some parameters are constant. + // There will be a series of RowAggFunctionCol created, one for each parameter. + // The first will be a RowUDAFFunctionCol. Subsequent ones will be RowAggFunctionCol + // with fAggFunction == ROWAGG_MULTI_PARM. Order is important. + // If this parameter is constant, that value is here. + SRCP fpConstCol; }; @@ -220,8 +228,11 @@ struct RowUDAFFunctionCol : public RowAggFunctionCol inputColIndex, outputColIndex, auxColIndex), bInterrupted(false) {} - RowUDAFFunctionCol(const RowUDAFFunctionCol& rhs) : RowAggFunctionCol(ROWAGG_UDAF, ROWAGG_FUNCT_UNDEFINE, - rhs.fInputColumnIndex, rhs.fOutputColumnIndex, rhs.fAuxColumnIndex), fUDAFContext(rhs.fUDAFContext) + RowUDAFFunctionCol(const RowUDAFFunctionCol& rhs) : + RowAggFunctionCol(ROWAGG_UDAF, ROWAGG_FUNCT_UNDEFINE, rhs.fInputColumnIndex, + rhs.fOutputColumnIndex, rhs.fAuxColumnIndex), + fUDAFContext(rhs.fUDAFContext), + bInterrupted(false) {} virtual ~RowUDAFFunctionCol() {} @@ -238,6 +249,16 @@ inline void RowAggFunctionCol::serialize(messageqcpp::ByteStream& bs) const bs << (uint8_t)fAggFunction; bs << fInputColumnIndex; bs << fOutputColumnIndex; + if (fpConstCol) + { + bs << (uint8_t)1; + fpConstCol.get()->serialize(bs); + } + else + { + bs << (uint8_t)0; + } + } inline void RowAggFunctionCol::deserialize(messageqcpp::ByteStream& bs) @@ -245,6 +266,13 @@ inline void RowAggFunctionCol::deserialize(messageqcpp::ByteStream& bs) bs >> (uint8_t&)fAggFunction; bs >> fInputColumnIndex; bs >> fOutputColumnIndex; + uint8_t t; + bs >> t; + if (t) + { + fpConstCol.reset(new ConstantColumn); + fpConstCol.get()->unserialize(bs); + } } inline void RowUDAFFunctionCol::serialize(messageqcpp::ByteStream& bs) const @@ -586,7 +614,7 @@ protected: virtual void doAvg(const Row&, int64_t, int64_t, int64_t); virtual void doStatistics(const Row&, int64_t, int64_t, int64_t); virtual void doBitOp(const Row&, int64_t, int64_t, int); - virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); + virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, uint64_t& funcColsIdx); virtual bool countSpecial(const RowGroup* pRG) { fRow.setIntField<8>(fRow.getIntField<8>(0) + pRG->getRowCount(), 0); @@ -902,7 +930,7 @@ protected: void doStatistics(const Row&, int64_t, int64_t, int64_t); void doGroupConcat(const Row&, int64_t, int64_t); void doBitOp(const Row&, int64_t, int64_t, int); - void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); + void doUDAF(const Row&, int64_t, int64_t, int64_t, uint64_t& funcColsIdx); bool countSpecial(const RowGroup* pRG) { return false; diff --git a/utils/udfsdk/allnull.h b/utils/udfsdk/allnull.h index da17f5d6b..6a727caf6 100644 --- a/utils/udfsdk/allnull.h +++ b/utils/udfsdk/allnull.h @@ -48,7 +48,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/avg_mode.h b/utils/udfsdk/avg_mode.h index 5722c5fea..fba1fcdcc 100644 --- a/utils/udfsdk/avg_mode.h +++ b/utils/udfsdk/avg_mode.h @@ -56,7 +56,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/avgx.h b/utils/udfsdk/avgx.h index 0569b6091..a830c6803 100644 --- a/utils/udfsdk/avgx.h +++ b/utils/udfsdk/avgx.h @@ -35,7 +35,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index df3f47649..e09228d77 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -68,7 +68,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/median.h b/utils/udfsdk/median.h index 142be6ba8..48bd93c70 100644 --- a/utils/udfsdk/median.h +++ b/utils/udfsdk/median.h @@ -56,7 +56,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/regr_avgx.cpp b/utils/udfsdk/regr_avgx.cpp index c7cc5b56e..aec4f361f 100644 --- a/utils/udfsdk/regr_avgx.cpp +++ b/utils/udfsdk/regr_avgx.cpp @@ -82,7 +82,7 @@ mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* { return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. } - if (valIn_x.empty() || valIn_y.empty()) + if (valIn_x.empty() || valIn_y.empty()) // Usually empty if NULL. Probably redundant { return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. } @@ -107,10 +107,6 @@ mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* { val = valIn_x.cast(); } - else if (valIn_x.compatible(longTypeId)) - { - val = valIn_x.cast(); - } else if (valIn_x.compatible(llTypeId)) { val = valIn_x.cast(); diff --git a/utils/udfsdk/regr_avgx.h b/utils/udfsdk/regr_avgx.h index f70f30d8c..27b8708f7 100644 --- a/utils/udfsdk/regr_avgx.h +++ b/utils/udfsdk/regr_avgx.h @@ -35,7 +35,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/ssq.h b/utils/udfsdk/ssq.h index 2cac61c2c..e27ecf1fa 100644 --- a/utils/udfsdk/ssq.h +++ b/utils/udfsdk/ssq.h @@ -56,7 +56,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index 3d3ac39ca..fe1f3fd0e 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -238,38 +238,5 @@ N="Makefile" Type="Makefile"/> - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index 5cd5243c5..2876fbf7e 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -451,7 +451,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { mcsv1sdk::mcsv1_UDAF::ReturnCode rc; uint64_t colOut = fFieldIndex[0]; - + bool isNull = false; if ((fFrameUnit == WF__FRAME_ROWS) || (fPrev == -1) || (!fPeer->operator()(getPointer(fRowData->at(c)), getPointer(fRowData->at(fPrev))))) @@ -468,13 +468,24 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // Put the parameter metadata (type, scale, precision) into valsIn mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + ConstantColumn* cc = NULL; for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) { - uint64_t colIn = fFieldIndex[i+1]; mcsv1sdk::ColumnDatum& datum = valsIn[i]; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colIn); + cc = static_cast(fConstantParms[i].get()); + if (cc) + { + datum.dataType = cc->resultType().colDataType; + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + uint64_t colIn = fFieldIndex[i+1]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } } if (b <= c && c <= e) @@ -494,12 +505,14 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) uint32_t flags[getContext().getParameterCount()]; for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { + cc = static_cast(fConstantParms[k].get()); uint64_t colIn = fFieldIndex[k+1]; mcsv1sdk::ColumnDatum& datum = valsIn[k]; // Turn on Null flags or skip based on respect nulls flags[k] = 0; - if (fRow.isNullValue(colIn) == true) + if ((!cc && fRow.isNullValue(colIn) == true) + || (cc && cc->type() == ConstantColumn::NULLDATA)) { if (!bRespectNulls) { @@ -510,133 +523,196 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - // MCOL-1201 Multi-Paramter calls - switch (datum.dataType) + if (!bHasNull && !(flags[k] & mcsv1sdk::PARAM_IS_NULL)) { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DECIMAL: + switch (datum.dataType) { - int64_t valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + int64_t valIn; + if (cc) { - continue; + valIn = cc->getIntVal(fRow, isNull); } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - case CalpontSystemCatalog::UDECIMAL: - { - uint64_t valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + int64_t valIn; + if (cc) { - continue; + valIn = cc->getDecimalVal(fRow, isNull).value; } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - { - double valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + uint64_t valIn; + if (cc) { - continue; + valIn = cc->getUintVal(fRow, isNull); } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - { - float valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + double valIn; + if (cc) { - continue; + valIn = cc->getDoubleVal(fRow, isNull); } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::VARBINARY: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::BLOB: - { - string valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + float valIn; + if (cc) { - continue; + valIn = cc->getFloatVal(fRow, isNull); } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - default: - { - string errStr = "(" + colType2String[i] + ")"; - errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); - cerr << errStr << endl; - throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::BLOB: + { + string valIn; + if (cc) + { + valIn = cc->getStrVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - break; + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + default: + { + string errStr = "(" + colType2String[i] + ")"; + errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + + break; + } } } // Skip if any value is NULL and respect nulls is off. diff --git a/utils/windowfunction/wf_udaf.h b/utils/windowfunction/wf_udaf.h index f7a4c4b08..fc3f9006d 100644 --- a/utils/windowfunction/wf_udaf.h +++ b/utils/windowfunction/wf_udaf.h @@ -53,8 +53,6 @@ public: // A class to control the execution of User Define Analytic Functions (UDAnF) // as defined by a specialization of mcsv1sdk::mcsv1_UDAF -// The template parameter is currently only used to support DISTINCT, as -// as that is done via a set template class WF_udaf : public WindowFunctionType { diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp index 4c5b4de32..f5598a7e5 100644 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -39,7 +39,6 @@ using namespace logging; using namespace ordering; #include "calpontsystemcatalog.h" -#include "constantcolumn.h" #include "dataconvert.h" // int64_t IDB_pow[19] using namespace execplan; @@ -228,6 +227,9 @@ WindowFunctionType::makeWindowFunction(const string& name, int ct, WindowFunctio break; } + // Copy the only the constant parameter pointers + af->constParms(wc->functionParms()); + return af; } @@ -634,6 +636,26 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos) return v; } +void WindowFunctionType::constParms(const std::vector& functionParms) +{ + // fConstantParms will end up with a copy of functionParms, but only + // the constant types will be copied. Other types will take up space but + // be NULL. This allows us to acces the constants without the overhead + // of dynamic_cast for every row. + for (size_t i = 0; i < functionParms.size(); ++i) + { + ConstantColumn* cc = dynamic_cast(functionParms[i].get()); + if (cc) + { + fConstantParms.push_back(functionParms[i]); + } + else + { + fConstantParms.push_back(SRCP(cc)); + } + } +} + } //namespace // vim:ts=4 sw=4: diff --git a/utils/windowfunction/windowfunctiontype.h b/utils/windowfunction/windowfunctiontype.h index 50732d3b5..efa1c548a 100644 --- a/utils/windowfunction/windowfunctiontype.h +++ b/utils/windowfunction/windowfunctiontype.h @@ -31,7 +31,7 @@ #include "returnedcolumn.h" #include "rowgroup.h" #include "windowframe.h" - +#include "constantcolumn.h" namespace ordering { @@ -198,6 +198,8 @@ public: fStep = step; } + void constParms(const std::vector& functionParms); + static boost::shared_ptr makeWindowFunction(const std::string&, int ct, WindowFunctionColumn* wc); protected: @@ -244,6 +246,9 @@ protected: // output and input field indices: [0] - output std::vector fFieldIndex; + // constant function parameters -- needed for udaf with constant + std::vector fConstantParms; + // row meta data rowgroup::RowGroup fRowGroup; rowgroup::Row fRow; From fb5f3240101abbdf8a1cbd2dc8fddc20004a5789 Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 5 Jun 2018 12:53:45 -0500 Subject: [PATCH 10/19] MCOL-1201 Fix DISTINCT with UDAF multi-parm --- dbcon/joblist/tupleaggregatestep.cpp | 134 ++++++++++++++++++--------- utils/rowgroup/rowaggregation.cpp | 6 +- 2 files changed, 95 insertions(+), 45 deletions(-) diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 491f86a8f..be0e2009d 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -1140,6 +1140,7 @@ void TupleAggregateStep::prep1PhaseAggregate( // populate the aggregate rowgroup AGG_MAP aggFuncMap; + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -1157,8 +1158,9 @@ void TupleAggregateStep::prep1PhaseAggregate( typeAgg.push_back(ti.dtype); widthAgg.push_back(ti.width); SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( - aggOp, stats, 0, i, jobInfo.cntStarPos)); + aggOp, stats, 0, outIdx, jobInfo.cntStarPos)); functionVec.push_back(funct); + ++outIdx; continue; } @@ -1174,9 +1176,10 @@ void TupleAggregateStep::prep1PhaseAggregate( typeAgg.push_back(ti.dtype); widthAgg.push_back(width); SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( - aggOp, stats, 0, i, -1)); + aggOp, stats, 0, outIdx, -1)); functionVec.push_back(funct); + ++outIdx; continue; } @@ -1222,16 +1225,17 @@ void TupleAggregateStep::prep1PhaseAggregate( widthAgg.push_back(width[colProj]); if (groupBy[it->second]->fOutputColumnIndex == (uint32_t) - 1) - groupBy[it->second]->fOutputColumnIndex = i; + groupBy[it->second]->fOutputColumnIndex = outIdx; else functionVec.push_back(SP_ROWAGG_FUNC_t( new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, - i, + outIdx, groupBy[it->second]->fOutputColumnIndex))); + ++outIdx; continue; } else if (find(jobInfo.expressionVec.begin(), jobInfo.expressionVec.end(), key) != @@ -1244,6 +1248,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(ti.precision); typeAgg.push_back(ti.dtype); widthAgg.push_back(ti.width); + ++outIdx; continue; } else if (jobInfo.groupConcatInfo.columns().find(key) != @@ -1256,6 +1261,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); + ++outIdx; continue; } else if (jobInfo.windowSet.find(key) != jobInfo.windowSet.end()) @@ -1267,6 +1273,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); + ++outIdx; continue; } else @@ -1296,7 +1303,7 @@ void TupleAggregateStep::prep1PhaseAggregate( { pUDAFFunc = udafc->getContext().getFunction(); // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, i)); + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, outIdx)); break; } } @@ -1307,7 +1314,7 @@ void TupleAggregateStep::prep1PhaseAggregate( } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colProj, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colProj, outIdx)); } functionVec.push_back(funct); @@ -1536,6 +1543,11 @@ void TupleAggregateStep::prep1PhaseAggregate( { aggFuncMap.insert(make_pair(boost::make_tuple(key, aggOp, pUDAFFunc), funct->fOutputColumnIndex)); } + + if (aggOp != ROWAGG_MULTI_PARM) + { + ++outIdx; + } } // now fix the AVG function, locate the count(column) position @@ -1687,7 +1699,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( uint32_t bigIntWidth = sizeof(int64_t); // map key = column key, operation (enum), and UDAF pointer if UDAF. AGG_MAP aggFuncMap; - set avgSet; +// set avgSet; + list multiParmIndexes; // fOR udaf UDAFColumn* udafc = NULL; @@ -1842,9 +1855,9 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } // skip sum / count(column) if avg is also selected - if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) && - (avgSet.find(aggKey) != avgSet.end())) - continue; +// if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) && +// (avgSet.find(aggKey) != avgSet.end())) +// continue; if (aggOp == ROWAGG_DISTINCT_SUM || aggOp == ROWAGG_DISTINCT_AVG || @@ -2080,7 +2093,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(udafFuncCol->fUDAFContext.getResultType()); widthAgg.push_back(udafFuncCol->fUDAFContext.getColWidth()); ++colAgg; - // UDAF Dummy holder for UserData struct + // Column for index of UDAF UserData struct oidsAgg.push_back(oidsProj[colProj]); keysAgg.push_back(aggKey); scaleAgg.push_back(0); @@ -2107,6 +2120,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(widthProj[colProj]); + multiParmIndexes.push_back(colAgg); ++colAgg; // If the param is const ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); @@ -2154,7 +2168,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. AGG_MAP aggDupFuncMap; - pUDAFFunc = NULL; + projColsUDAFIdx = 0; + int64_t multiParms = 0; // copy over the groupby vector // update the outputColumnIndex if returned @@ -2165,8 +2180,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i)); } - projColsUDAFIdx = 0; // locate the return column position in aggregated rowgroup + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { udafc = NULL; @@ -2176,23 +2191,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colAgg = -1; - if (aggOp == ROWAGG_UDAF) + if (aggOp == ROWAGG_MULTI_PARM) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; - for (; it != jobInfo.projectionCols.end(); it++) - { - udafc = dynamic_cast((*it).get()); - projColsUDAFIdx++; - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - break; - } - } - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); - } + // Skip on final agg.: Extra parms for an aggregate have no work there. + ++multiParms; + continue; } if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != @@ -2220,6 +2223,25 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } } + if (aggOp == ROWAGG_UDAF) + { + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIdx++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } + } + switch (aggOp) { case ROWAGG_DISTINCT_AVG: @@ -2470,7 +2492,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep1PhaseDistinctAggregate: " << emsg << " oid=" @@ -2494,7 +2516,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByNoDist[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByNoDist[j]->fOutputColumnIndex = i; + groupByNoDist[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByNoDist[j]->fOutputColumnIndex; } @@ -2504,7 +2526,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (dupGroupbyIndex != -1) functionVec2.push_back(SP_ROWAGG_FUNC_t( new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } else { @@ -2512,11 +2534,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colAgg, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -2553,6 +2575,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // for (i // now fix the AVG function, locate the count(column) position @@ -2570,7 +2593,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -2785,6 +2808,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); groupBySub.push_back(groupby); + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; + // tricky part : 2 function vectors // -- dummy function vector for sub-aggregator, which does distinct only // -- aggregate function on this distinct column for rowAggDist @@ -2792,6 +2820,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } if (returnedColVec[k].first != distinctColKey) continue; @@ -2812,7 +2845,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -2831,9 +2864,15 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( { vector functionSub1 = functionNoDistVec; vector functionSub2; + int64_t multiParms = 0; for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } // search non-distinct functions in functionVec vector::iterator it = functionVec2.begin(); @@ -2849,7 +2888,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } else if ((f->fOutputColumnIndex == k) && @@ -2871,7 +2910,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -4272,9 +4311,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funct.reset(new RowUDAFFunctionCol( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fOutputColumnIndex, + udafFuncCol->fOutputColumnIndex-multiParms, udafFuncCol->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); + pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); } else { @@ -4282,9 +4322,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funcPm->fAggFunction, funcPm->fStatsFunction, funcPm->fOutputColumnIndex, - funcPm->fOutputColumnIndex, + funcPm->fOutputColumnIndex-multiParms, funcPm->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); + pUDAFFunc = NULL; } } @@ -4500,6 +4541,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( precisionAggDist.push_back(precisionAggUm[colUm]); typeAggDist.push_back(typeAggUm[colUm]); widthAggDist.push_back(widthAggUm[colUm]); + colUm -= multiParms; } // not a direct hit -- a returned column is not already in the RG from PMs @@ -4536,8 +4578,16 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(retKey); scaleAggDist.push_back(0); - precisionAggDist.push_back(19); - typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + if (isUnsigned(typeAggUm[colUm])) + { + precisionAggDist.push_back(20); + typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); + } + else + { + precisionAggDist.push_back(19); + typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + } widthAggDist.push_back(bigIntWidth); } } diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 6339554f1..bead74aff 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -4284,14 +4284,14 @@ void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, static_any::any valOut; // Get the user data - boost::shared_ptr userData = rowIn.getUserData(colIn + 1); + boost::shared_ptr userDataIn = rowIn.getUserData(colIn+1); // Unlike other aggregates, the data isn't in colIn, so testing it for NULL // there won't help. In case of NULL, userData will be NULL. uint32_t flags[1]; flags[0] = 0; - if (!userData) + if (!userDataIn) { if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { @@ -4309,7 +4309,7 @@ void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, // Call the UDAF subEvaluate method mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userData.get()); + rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userDataIn.get()); fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) From 6fa7dded6fb9c9ebfc50a244c664d9246c0b8578 Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 11 May 2018 09:50:10 -0500 Subject: [PATCH 11/19] MCOL-1201 manual rebase with develop. Obsoletes branch MCOL-1201 --- dbcon/execplan/aggregatecolumn.cpp | 96 +-- dbcon/execplan/aggregatecolumn.h | 44 +- dbcon/joblist/expressionstep.cpp | 12 +- dbcon/joblist/expressionstep.h | 1 + dbcon/joblist/groupconcat.cpp | 2 +- dbcon/joblist/joblistfactory.cpp | 531 ++++++++---- dbcon/joblist/tupleaggregatestep.cpp | 280 +++++-- dbcon/mysql/ha_calpont_execplan.cpp | 858 +++++++++++--------- dbcon/mysql/ha_calpont_impl.cpp | 9 +- dbcon/mysql/ha_window_function.cpp | 37 +- utils/common/any.hpp | 270 +++--- utils/rowgroup/rowaggregation.cpp | 605 +++++++++----- utils/rowgroup/rowaggregation.h | 29 +- utils/udfsdk/CMakeLists.txt | 2 +- utils/udfsdk/allnull.cpp | 7 +- utils/udfsdk/allnull.h | 4 +- utils/udfsdk/avg_mode.cpp | 14 +- utils/udfsdk/avg_mode.h | 14 +- utils/udfsdk/mcsv1_udaf.cpp | 13 +- utils/udfsdk/mcsv1_udaf.h | 88 +- utils/udfsdk/median.cpp | 14 +- utils/udfsdk/median.h | 8 +- utils/udfsdk/ssq.cpp | 14 +- utils/udfsdk/ssq.h | 8 +- utils/udfsdk/udfmysql.cpp | 162 ++++ utils/udfsdk/udfsdk.vpj | 4 + utils/windowfunction/wf_udaf.cpp | 280 +++++-- utils/windowfunction/wf_udaf.h | 27 +- utils/windowfunction/windowfunctiontype.cpp | 8 +- writeengine/wrapper/writeengine.cpp | 10 +- 30 files changed, 2255 insertions(+), 1196 deletions(-) diff --git a/dbcon/execplan/aggregatecolumn.cpp b/dbcon/execplan/aggregatecolumn.cpp index 18cba2607..5bce12d79 100644 --- a/dbcon/execplan/aggregatecolumn.cpp +++ b/dbcon/execplan/aggregatecolumn.cpp @@ -98,36 +98,6 @@ AggregateColumn::AggregateColumn(const uint32_t sessionID): { } -AggregateColumn::AggregateColumn(const AggOp aggOp, ReturnedColumn* parm, const uint32_t sessionID): - ReturnedColumn(sessionID), - fAggOp(aggOp), - fAsc(false), - fData(aggOp + "(" + parm->data() + ")") -{ - fFunctionParms.reset(parm); -} - -AggregateColumn::AggregateColumn(const AggOp aggOp, const string& content, const uint32_t sessionID): - ReturnedColumn(sessionID), - fAggOp(aggOp), - fAsc(false), - fData(aggOp + "(" + content + ")") -{ - // TODO: need to handle distinct - fFunctionParms.reset(new ArithmeticColumn(content)); -} - -// deprecated constructor. use function name as string -AggregateColumn::AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID): - ReturnedColumn(sessionID), - fFunctionName(functionName), - fAggOp(NOOP), - fAsc(false), - fData(functionName + "(" + parm->data() + ")") -{ - fFunctionParms.reset(parm); -} - // deprecated constructor. use function name as string AggregateColumn::AggregateColumn(const string& functionName, const string& content, const uint32_t sessionID): ReturnedColumn(sessionID), @@ -137,20 +107,21 @@ AggregateColumn::AggregateColumn(const string& functionName, const string& conte fData(functionName + "(" + content + ")") { // TODO: need to handle distinct - fFunctionParms.reset(new ArithmeticColumn(content)); + SRCP srcp(new ArithmeticColumn(content)); + fAggParms.push_back(srcp); } AggregateColumn::AggregateColumn( const AggregateColumn& rhs, const uint32_t sessionID ): ReturnedColumn(rhs, sessionID), fFunctionName (rhs.fFunctionName), fAggOp(rhs.fAggOp), - fFunctionParms(rhs.fFunctionParms), fTableAlias(rhs.tableAlias()), fAsc(rhs.asc()), fData(rhs.data()), fConstCol(rhs.fConstCol) { fAlias = rhs.alias(); + fAggParms = rhs.fAggParms; } /** @@ -166,10 +137,14 @@ const string AggregateColumn::toString() const if (fAlias.length() > 0) output << "/Alias: " << fAlias << endl; - if (fFunctionParms == 0) - output << "No arguments" << endl; + if (fAggParms.size() == 0) + output << "No arguments"; else - output << *fFunctionParms << endl; + for (uint32_t i = 0; i < fAggParms.size(); ++i) + { + output << *(fAggParms[i]) << " "; + } + output << endl; if (fConstCol) output << *fConstCol; @@ -191,10 +166,11 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const b << fFunctionName; b << static_cast(fAggOp); - if (fFunctionParms == 0) - b << (uint8_t) ObjectReader::NULL_CLASS; - else - fFunctionParms->serialize(b); + b << static_cast(fAggParms.size()); + for (uint32_t i = 0; i < fAggParms.size(); ++i) + { + fAggParms[i]->serialize(b); + } b << static_cast(fGroupByColList.size()); @@ -219,20 +195,26 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const void AggregateColumn::unserialize(messageqcpp::ByteStream& b) { - ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN); - fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end()); - fProjectColList.erase(fProjectColList.begin(), fProjectColList.end()); - ReturnedColumn::unserialize(b); - b >> fFunctionName; - b >> fAggOp; - //delete fFunctionParms; - fFunctionParms.reset( - dynamic_cast(ObjectReader::createTreeNode(b))); - messageqcpp::ByteStream::quadbyte size; messageqcpp::ByteStream::quadbyte i; ReturnedColumn* rc; + ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN); + fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end()); + fProjectColList.erase(fProjectColList.begin(), fProjectColList.end()); + fAggParms.erase(fAggParms.begin(), fAggParms.end()); + ReturnedColumn::unserialize(b); + b >> fFunctionName; + b >> fAggOp; + + b >> size; + for (i = 0; i < size; i++) + { + rc = dynamic_cast(ObjectReader::createTreeNode(b)); + SRCP srcp(rc); + fAggParms.push_back(srcp); + } + b >> size; for (i = 0; i < size; i++) @@ -261,6 +243,7 @@ void AggregateColumn::unserialize(messageqcpp::ByteStream& b) bool AggregateColumn::operator==(const AggregateColumn& t) const { const ReturnedColumn* rc1, *rc2; + AggParms::const_iterator it, it2; rc1 = static_cast(this); rc2 = static_cast(&t); @@ -277,16 +260,18 @@ bool AggregateColumn::operator==(const AggregateColumn& t) const if (fAggOp != t.fAggOp) return false; - if (fFunctionParms.get() != NULL && t.fFunctionParms.get() != NULL) + if (aggParms().size() != t.aggParms().size()) { - if (*fFunctionParms.get() != t.fFunctionParms.get()) + return false; + } + for (it = fAggParms.begin(), it2 = t.fAggParms.begin(); + it != fAggParms.end(); + ++it, ++it2) + { + if (**it != **it2) return false; } - else if (fFunctionParms.get() != NULL || t.fFunctionParms.get() != NULL) - return false; - //if (fAlias != t.fAlias) - // return false; if (fTableAlias != t.fTableAlias) return false; @@ -645,3 +630,4 @@ AggregateColumn::AggOp AggregateColumn::agname2num(const string& agname) } } // namespace execplan + diff --git a/dbcon/execplan/aggregatecolumn.h b/dbcon/execplan/aggregatecolumn.h index d1db7e5a4..b0884f179 100644 --- a/dbcon/execplan/aggregatecolumn.h +++ b/dbcon/execplan/aggregatecolumn.h @@ -40,6 +40,8 @@ class ByteStream; namespace execplan { +typedef std::vector AggParms; + /** * @brief A class to represent a aggregate return column * @@ -74,7 +76,8 @@ public: BIT_OR, BIT_XOR, GROUP_CONCAT, - UDAF + UDAF, + MULTI_PARM }; /** @@ -94,21 +97,6 @@ public: */ AggregateColumn(const uint32_t sessionID); - /** - * ctor - */ - AggregateColumn(const AggOp aggop, ReturnedColumn* parm, const uint32_t sessionID = 0); - - /** - * ctor - */ - AggregateColumn(const AggOp aggop, const std::string& content, const uint32_t sessionID = 0); - - /** - * ctor - */ - AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID = 0); - /** * ctor */ @@ -155,24 +143,27 @@ public: fAggOp = aggOp; } + /** get function parms - * - * set the function parms from this object */ - virtual const SRCP functionParms() const + virtual AggParms& aggParms() { - return fFunctionParms; + return fAggParms; + } + + virtual const AggParms& aggParms() const + { + return fAggParms; } /** set function parms - * - * set the function parms for this object */ - virtual void functionParms(const SRCP& functionParms) + virtual void aggParms(const AggParms& parms) { - fFunctionParms = functionParms; + fAggParms = parms; } + /** return a copy of this pointer * * deep copy of this pointer and return the copy @@ -325,9 +316,10 @@ protected: uint8_t fAggOp; /** - * A ReturnedColumn objects that are the arguments to this function + * ReturnedColumn objects that are the arguments to this + * function */ - SRCP fFunctionParms; + AggParms fAggParms; /** table alias * A string to represent table alias name which contains this column diff --git a/dbcon/joblist/expressionstep.cpp b/dbcon/joblist/expressionstep.cpp index 0e064c359..4a8a14ff3 100644 --- a/dbcon/joblist/expressionstep.cpp +++ b/dbcon/joblist/expressionstep.cpp @@ -56,6 +56,17 @@ using namespace rowgroup; namespace joblist { +ExpressionStep::ExpressionStep() : + fExpressionFilter(NULL), + fExpressionId(-1), + fVarBinOK(false), + fSelectFilter(false), + fAssociatedJoinId(0), + fDoJoin(false), + fVirtual(false) +{ +} + ExpressionStep::ExpressionStep(const JobInfo& jobInfo) : JobStep(jobInfo), fExpressionFilter(NULL), @@ -68,7 +79,6 @@ ExpressionStep::ExpressionStep(const JobInfo& jobInfo) : { } - ExpressionStep::ExpressionStep(const ExpressionStep& rhs) : JobStep(rhs), fExpression(rhs.expression()), diff --git a/dbcon/joblist/expressionstep.h b/dbcon/joblist/expressionstep.h index 4a069440f..63423fc7d 100644 --- a/dbcon/joblist/expressionstep.h +++ b/dbcon/joblist/expressionstep.h @@ -50,6 +50,7 @@ class ExpressionStep : public JobStep { public: // constructors + ExpressionStep(); ExpressionStep(const JobInfo&); // destructor constructors virtual ~ExpressionStep(); diff --git a/dbcon/joblist/groupconcat.cpp b/dbcon/joblist/groupconcat.cpp index 234fc0a8e..afc91a2ec 100644 --- a/dbcon/joblist/groupconcat.cpp +++ b/dbcon/joblist/groupconcat.cpp @@ -78,7 +78,7 @@ void GroupConcatInfo::prepGroupConcat(JobInfo& jobInfo) while (i != jobInfo.groupConcatCols.end()) { GroupConcatColumn* gcc = dynamic_cast(i->get()); - const RowColumn* rcp = dynamic_cast(gcc->functionParms().get()); + const RowColumn* rcp = dynamic_cast(gcc->aggParms()[0].get()); SP_GroupConcat groupConcat(new GroupConcat); groupConcat->fSeparator = gcc->separator(); diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index a48ecd13a..4cf7bccc5 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -18,7 +18,6 @@ // $Id: joblistfactory.cpp 9632 2013-06-18 22:18:20Z xlou $ - #include #include #include @@ -870,7 +869,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo if (gcc != NULL) { - srcp = gcc->functionParms(); + srcp = gcc->aggParms()[0]; const RowColumn* rcp = dynamic_cast(srcp.get()); const vector& cols = rcp->columnVec(); @@ -891,21 +890,55 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } +#if 0 + // MCOL-1201 Add support for multi-parameter UDAnF + UDAFColumn* udafc = dynamic_cast(retCols[i].get()); + if (udafc != NULL) + { + srcp = udafc->aggParms()[0]; + const RowColumn* rcp = dynamic_cast(srcp.get()); + const vector& cols = rcp->columnVec(); + for (vector::const_iterator j = cols.begin(); j != cols.end(); j++) + { + srcp = *j; + if (dynamic_cast(srcp.get()) == NULL) + retCols.push_back(srcp); + + // Do we need this? + const ArithmeticColumn* ac = dynamic_cast(srcp.get()); + const FunctionColumn* fc = dynamic_cast(srcp.get()); + if (ac != NULL || fc != NULL) + { + // bug 3728, make a dummy expression step for each expression. + scoped_ptr es(new ExpressionStep(jobInfo)); + es->expression(srcp, jobInfo); + } + } + continue; + } +#endif srcp = retCols[i]; const AggregateColumn* ag = dynamic_cast(retCols[i].get()); - - if (ag != NULL) - srcp = ag->functionParms(); - - const ArithmeticColumn* ac = dynamic_cast(srcp.get()); - const FunctionColumn* fc = dynamic_cast(srcp.get()); - - if (ac != NULL || fc != NULL) + // bug 3728 Make a dummy expression for srcp if it is an + // expression. This is needed to fill in some stuff. + // Note that es.expression does nothing if the item is not an expression. + if (ag == NULL) { - // bug 3728, make a dummy expression step for each expression. - scoped_ptr es(new ExpressionStep(jobInfo)); - es->expression(srcp, jobInfo); + // Not an aggregate. Make a dummy expression for the item + ExpressionStep es; + es.expression(srcp, jobInfo); + } + else + { + // MCOL-1201 multi-argument aggregate. make a dummy expression + // step for each argument that is an expression. + for (uint32_t i = 0; i < ag->aggParms().size(); ++i) + { + srcp = ag->aggParms()[i]; + ExpressionStep es; + es.expression(srcp, jobInfo); + } } } @@ -915,17 +948,18 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo { srcp = retCols[i]; const SimpleColumn* sc = dynamic_cast(srcp.get()); + AggregateColumn* aggc = dynamic_cast(srcp.get()); bool doDistinct = (csep->distinct() && csep->groupByCols().empty()); uint32_t tupleKey = -1; string alias; string view; - // returned column could be groupby column, a simplecoulumn not a agregatecolumn + // returned column could be groupby column, a simplecoulumn not an aggregatecolumn int op = 0; CalpontSystemCatalog::OID dictOid = 0; CalpontSystemCatalog::ColType ct, aggCt; - if (sc == NULL) + if (aggc) { GroupConcatColumn* gcc = dynamic_cast(retCols[i].get()); @@ -939,7 +973,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo tupleKey = ti.key; jobInfo.returnedColVec.push_back(make_pair(tupleKey, gcc->aggOp())); // not a tokenOnly column. Mark all the columns involved - srcp = gcc->functionParms(); + srcp = gcc->aggParms()[0]; const RowColumn* rowCol = dynamic_cast(srcp.get()); if (rowCol) @@ -963,186 +997,353 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } - - AggregateColumn* ac = dynamic_cast(retCols[i].get()); - - if (ac != NULL) + else { - srcp = ac->functionParms(); - sc = dynamic_cast(srcp.get()); + // Aggregate column not group concat + AggParms& aggParms = aggc->aggParms(); - if (ac->constCol().get() != NULL) + for (uint32_t parm = 0; parm < aggParms.size(); ++parm) { - // replace the aggregate on constant with a count(*) - SRCP clone; - UDAFColumn* udafc = dynamic_cast(ac); - - if (udafc) + if (aggc->constCol().get() != NULL) { - clone.reset(new UDAFColumn(*udafc, ac->sessionID())); + // replace the aggregate on constant with a count(*) + SRCP clone; + UDAFColumn* udafc = dynamic_cast(aggc); + + if (udafc) + { + clone.reset(new UDAFColumn(*udafc, aggc->sessionID())); + } + else + { + clone.reset(new AggregateColumn(*aggc, aggc->sessionID())); + } + + jobInfo.constAggregate.insert(make_pair(i, clone)); + aggc->aggOp(AggregateColumn::COUNT_ASTERISK); + aggc->distinct(false); + } + + srcp = aggParms[parm]; + sc = dynamic_cast(srcp.get()); + if (parm == 0) + { + op = aggc->aggOp(); } else { - clone.reset(new AggregateColumn(*ac, ac->sessionID())); + op = AggregateColumn::MULTI_PARM; + } + doDistinct = aggc->distinct(); + if (aggParms.size() == 1) + { + // Set the col type based on the single parm. + // Changing col type based on a parm if multiple parms + // doesn't really make sense. + updateAggregateColType(aggc, srcp, op, jobInfo); + } + aggCt = aggc->resultType(); + + // As of bug3695, make sure varbinary is not used in aggregation. + // TODO: allow for UDAF + if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY) + throw runtime_error ("VARBINARY in aggregate function is not supported."); + + // Project the parm columns or expressions + if (sc != NULL) + { + CalpontSystemCatalog::OID retOid = sc->oid(); + CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); + alias = extractTableAlias(sc); + view = sc->viewName(); + + if (!sc->schemaName().empty()) + { + ct = sc->colType(); + + //XXX use this before connector sets colType in sc correctly. + if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) + ct = jobInfo.csc->colType(sc->oid()); + + //X + dictOid = isDictCol(ct); + } + else + { + retOid = (tblOid + 1) + sc->colPosition(); + ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; + } + + TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); + tupleKey = ti.key; + + // this is a string column + if (dictOid > 0) + { + map::iterator findit = jobInfo.tokenOnly.find(tupleKey); + + // if the column has never seen, and the op is count: possible need count only. + if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) + { + if (findit == jobInfo.tokenOnly.end()) + jobInfo.tokenOnly[tupleKey] = true; + } + // if aggregate other than count, token is not enough. + else if (op != 0 || doDistinct) + { + jobInfo.tokenOnly[tupleKey] = false; + } + + findit = jobInfo.tokenOnly.find(tupleKey); + + if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) + { + dictMap[tupleKey] = dictOid; + jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; + ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); + jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + } + } + } + else + { + const ArithmeticColumn* ac = NULL; + const FunctionColumn* fc = NULL; + const WindowFunctionColumn* wc = NULL; + bool hasAggCols = false; + + if ((ac = dynamic_cast(srcp.get())) != NULL) + { + if (ac->aggColumnList().size() > 0) + hasAggCols = true; + } + else if ((fc = dynamic_cast(srcp.get())) != NULL) + { + if (fc->aggColumnList().size() > 0) + hasAggCols = true; + } + else if (dynamic_cast(srcp.get()) != NULL) + { + std::ostringstream errmsg; + errmsg << "Invalid aggregate function nesting."; + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + else if (dynamic_cast(srcp.get()) != NULL) + { + } + else if ((wc = dynamic_cast(srcp.get())) == NULL) + { + std::ostringstream errmsg; + errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + + uint64_t eid = srcp.get()->expressionId(); + ct = srcp.get()->resultType(); + TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); + tupleKey = ti.key; + + if (hasAggCols) + jobInfo.expressionVec.push_back(tupleKey); } - jobInfo.constAggregate.insert(make_pair(i, clone)); - ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ac->distinct(false); - } + // add to project list + vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - op = ac->aggOp(); - doDistinct = ac->distinct(); - updateAggregateColType(ac, srcp, op, jobInfo); - aggCt = ac->resultType(); + if (keyIt == projectKeys.end()) + { + RetColsVector::iterator it = pcv.end(); - // As of bug3695, make sure varbinary is not used in aggregation. - if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY) - throw runtime_error ("VARBINARY in aggregate function is not supported."); - } - } + if (doDistinct) + it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); + else + it = pcv.insert(pcv.end(), srcp); - // simple column selected or aggregated - if (sc != NULL) - { - // one column only need project once - CalpontSystemCatalog::OID retOid = sc->oid(); - CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); - alias = extractTableAlias(sc); - view = sc->viewName(); + projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); + } + else if (doDistinct) // @bug4250, move forward distinct column if necessary. + { + uint32_t pos = distance(projectKeys.begin(), keyIt); - if (!sc->schemaName().empty()) - { - ct = sc->colType(); + if (pos >= lastGroupByPos) + { + pcv[pos] = pcv[lastGroupByPos]; + pcv[lastGroupByPos] = srcp; + projectKeys[pos] = projectKeys[lastGroupByPos]; + projectKeys[lastGroupByPos] = tupleKey; + lastGroupByPos++; + } + } -//XXX use this before connector sets colType in sc correctly. - if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) - ct = jobInfo.csc->colType(sc->oid()); + if (doDistinct && dictOid > 0) + tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; -//X - dictOid = isDictCol(ct); - } - else - { - retOid = (tblOid + 1) + sc->colPosition(); - ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; - } + // remember the columns to be returned + jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); - TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); - tupleKey = ti.key; + if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) + jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; - // this is a string column - if (dictOid > 0) - { - map::iterator findit = jobInfo.tokenOnly.find(tupleKey); - - // if the column has never seen, and the op is count: possible need count only. - if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) - { - if (findit == jobInfo.tokenOnly.end()) - jobInfo.tokenOnly[tupleKey] = true; - } - // if aggregate other than count, token is not enough. - else if (op != 0 || doDistinct) - { - jobInfo.tokenOnly[tupleKey] = false; - } - - findit = jobInfo.tokenOnly.find(tupleKey); - - if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) - { - dictMap[tupleKey] = dictOid; - jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; - ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); - jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } } else { - const ArithmeticColumn* ac = NULL; - const FunctionColumn* fc = NULL; - const WindowFunctionColumn* wc = NULL; - bool hasAggCols = false; - - if ((ac = dynamic_cast(srcp.get())) != NULL) + // Not an Aggregate + // simple column selected + if (sc != NULL) { - if (ac->aggColumnList().size() > 0) - hasAggCols = true; + // one column only need project once + CalpontSystemCatalog::OID retOid = sc->oid(); + CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); + alias = extractTableAlias(sc); + view = sc->viewName(); + + if (!sc->schemaName().empty()) + { + ct = sc->colType(); + + //XXX use this before connector sets colType in sc correctly. + if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) + ct = jobInfo.csc->colType(sc->oid()); + + //X + dictOid = isDictCol(ct); + } + else + { + retOid = (tblOid + 1) + sc->colPosition(); + ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; + } + + TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); + tupleKey = ti.key; + + // this is a string column + if (dictOid > 0) + { + map::iterator findit = jobInfo.tokenOnly.find(tupleKey); + + // if the column has never seen, and the op is count: possible need count only. + if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) + { + if (findit == jobInfo.tokenOnly.end()) + jobInfo.tokenOnly[tupleKey] = true; + } + // if aggregate other than count, token is not enough. + else if (op != 0 || doDistinct) + { + jobInfo.tokenOnly[tupleKey] = false; + } + + findit = jobInfo.tokenOnly.find(tupleKey); + + if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) + { + dictMap[tupleKey] = dictOid; + jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; + ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); + jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + } + } } - else if ((fc = dynamic_cast(srcp.get())) != NULL) - { - if (fc->aggColumnList().size() > 0) - hasAggCols = true; - } - else if (dynamic_cast(srcp.get()) != NULL) - { - std::ostringstream errmsg; - errmsg << "Invalid aggregate function nesting."; - cerr << boldStart << errmsg.str() << boldStop << endl; - throw logic_error(errmsg.str()); - } - else if ((wc = dynamic_cast(srcp.get())) == NULL) - { - std::ostringstream errmsg; - errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); - cerr << boldStart << errmsg.str() << boldStop << endl; - throw logic_error(errmsg.str()); - } - - uint64_t eid = srcp.get()->expressionId(); - ct = srcp.get()->resultType(); - TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); - tupleKey = ti.key; - - if (hasAggCols) - jobInfo.expressionVec.push_back(tupleKey); - } - - // add to project list - vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - - if (keyIt == projectKeys.end()) - { - RetColsVector::iterator it = pcv.end(); - - if (doDistinct) - it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); else - it = pcv.insert(pcv.end(), srcp); - - projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); - } - else if (doDistinct) // @bug4250, move forward distinct column if necessary. - { - uint32_t pos = distance(projectKeys.begin(), keyIt); - - if (pos >= lastGroupByPos) { - pcv[pos] = pcv[lastGroupByPos]; - pcv[lastGroupByPos] = srcp; - projectKeys[pos] = projectKeys[lastGroupByPos]; - projectKeys[lastGroupByPos] = tupleKey; - lastGroupByPos++; + const ArithmeticColumn* ac = NULL; + const FunctionColumn* fc = NULL; + const WindowFunctionColumn* wc = NULL; + bool hasAggCols = false; + + if ((ac = dynamic_cast(srcp.get())) != NULL) + { + if (ac->aggColumnList().size() > 0) + hasAggCols = true; + } + else if ((fc = dynamic_cast(srcp.get())) != NULL) + { + if (fc->aggColumnList().size() > 0) + hasAggCols = true; + } + else if (dynamic_cast(srcp.get()) != NULL) + { + std::ostringstream errmsg; + errmsg << "Invalid aggregate function nesting."; + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + else if (dynamic_cast(srcp.get()) != NULL) + { + } + else if ((wc = dynamic_cast(srcp.get())) == NULL) + { + std::ostringstream errmsg; + errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + + uint64_t eid = srcp.get()->expressionId(); + ct = srcp.get()->resultType(); + TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); + tupleKey = ti.key; + + if (hasAggCols) + jobInfo.expressionVec.push_back(tupleKey); } - } - if (doDistinct && dictOid > 0) - tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + // add to project list + vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - // remember the columns to be returned - jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + if (keyIt == projectKeys.end()) + { + RetColsVector::iterator it = pcv.end(); - if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) - jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + if (doDistinct) + it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); + else + it = pcv.insert(pcv.end(), srcp); - // bug 1499 distinct processing, save unique distinct columns - if (doDistinct && - (jobInfo.distinctColVec.end() == - find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) - { - jobInfo.distinctColVec.push_back(tupleKey); + projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); + } + else if (doDistinct) // @bug4250, move forward distinct column if necessary. + { + uint32_t pos = distance(projectKeys.begin(), keyIt); + + if (pos >= lastGroupByPos) + { + pcv[pos] = pcv[lastGroupByPos]; + pcv[lastGroupByPos] = srcp; + projectKeys[pos] = projectKeys[lastGroupByPos]; + projectKeys[lastGroupByPos] = tupleKey; + lastGroupByPos++; + } + } + + if (doDistinct && dictOid > 0) + tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + + // remember the columns to be returned + jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + + if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) + jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 9e23ac17b..ff490da5b 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -164,6 +164,9 @@ inline RowAggFunctionType functionIdMap(int planFuncId) case AggregateColumn::UDAF: return ROWAGG_UDAF; + case AggregateColumn::MULTI_PARM: + return ROWAGG_MULTI_PARM; + default: return ROWAGG_FUNCT_UNDEFINE; } @@ -1302,7 +1305,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -1468,7 +1471,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (!udafFuncCol) { - throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); @@ -1483,6 +1486,17 @@ void TupleAggregateStep::prep1PhaseAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(key); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + widthAgg.push_back(width[colProj]); + } + break; + default: { ostringstream emsg; @@ -1560,7 +1574,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(3)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVec[i]->fAuxColumnIndex = lastCol++; @@ -1675,7 +1689,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation map projColPosMap; @@ -1848,7 +1862,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -2043,7 +2057,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (!udafFuncCol) { - throw logic_error("prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); } // Return column @@ -2065,6 +2079,18 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(aggKey); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + widthAgg.push_back(widthProj[colProj]); + ++colAgg; + } + break; + default: { ostringstream emsg; @@ -2111,7 +2137,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( groupByNoDist.push_back(groupby); aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i)); } - + + projColsUDAFIndex = 0; // locate the return column position in aggregated rowgroup for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -2121,6 +2148,14 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colAgg = -1; + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + + if (udafc) + pUDAFFunc = udafc->getContext().getFunction(); + } + if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { @@ -2432,11 +2467,37 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { - SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + // update the aggregate function vector + SP_ROWAGG_FUNC_t funct; + if (aggOp == ROWAGG_UDAF) + { + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } + } + else + { + funct.reset(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + } if (aggOp == ROWAGG_COUNT_NO_OP) funct->fAuxColumnIndex = colAgg; @@ -2549,7 +2610,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep1PhaseDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVec2[i]->fAuxColumnIndex = lastCol++; @@ -2893,7 +2954,7 @@ void TupleAggregateStep::prep2PhasesAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation vector width; map projColPosMap; @@ -3036,12 +3097,11 @@ void TupleAggregateStep::prep2PhasesAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAggPm)); break; } - } if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -3240,7 +3300,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } oidsAggPm.push_back(oidsProj[colProj]); @@ -3261,6 +3321,18 @@ void TupleAggregateStep::prep2PhasesAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(scaleProj[colProj]); + precisionAggPm.push_back(precisionProj[colProj]); + typeAggPm.push_back(typeProj[colProj]); + widthAggPm.push_back(width[colProj]); + colAggPm++; + } + break; + default: { ostringstream emsg; @@ -3278,11 +3350,16 @@ void TupleAggregateStep::prep2PhasesAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap; AGG_MAP aggDupFuncMap; + projColsUDAFIndex = 0; // copy over the groupby vector // update the outputColumnIndex if returned for (uint64_t i = 0; i < groupByPm.size(); i++) @@ -3299,7 +3376,14 @@ void TupleAggregateStep::prep2PhasesAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colPm = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + ++multiParms; + continue; + } // Is this a UDAF? use the function as part of the key. + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; if (aggOp == ROWAGG_UDAF) @@ -3452,20 +3536,36 @@ void TupleAggregateStep::prep2PhasesAggregate( functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { + // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i)); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i-multiParms)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i-multiParms)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -3517,7 +3617,7 @@ void TupleAggregateStep::prep2PhasesAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = returnedColVec.size() - multiParms; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -3545,7 +3645,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; @@ -3691,6 +3791,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector groupByPm, groupByUm, groupByNoDist; vector functionVecPm, functionNoDistVec, functionVecUm; + list multiParmIndexes; uint32_t bigIntWidth = sizeof(int64_t); map, uint64_t> avgFuncDistMap; @@ -3702,7 +3803,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation vector width; map projColPosMap; @@ -3856,7 +3957,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -4050,7 +4151,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } // Return column @@ -4072,6 +4173,19 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(scaleProj[colProj]); + precisionAggPm.push_back(precisionProj[colProj]); + typeAggPm.push_back(typeProj[colProj]); + widthAggPm.push_back(width[colProj]); + multiParmIndexes.push_back(colAggPm); + colAggPm++; + } + break; + default: { ostringstream emsg; @@ -4093,12 +4207,23 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( groupByUm.push_back(groupby); } + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; for (uint32_t idx = 0; idx < functionVecPm.size(); idx++) + { SP_ROWAGG_FUNC_t funct; SP_ROWAGG_FUNC_t funcPm = functionVecPm[idx]; // UDAF support + if (funcPm->fAggFunction == ROWAGG_MULTI_PARM) + { + // Multi-Parm is not used on the UM + ++multiParms; + continue; + } if (funcPm->fAggFunction == ROWAGG_UDAF) { RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); @@ -4106,7 +4231,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); } else @@ -4116,18 +4241,25 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funcPm->fStatsFunction, funcPm->fOutputColumnIndex, funcPm->fOutputColumnIndex, - funcPm->fAuxColumnIndex)); + funcPm->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); } } - posAggUm = posAggPm; - oidsAggUm = oidsAggPm; - keysAggUm = keysAggPm; - scaleAggUm = scaleAggPm; - precisionAggUm = precisionAggPm; - widthAggUm = widthAggPm; - typeAggUm = typeAggPm; + // Copy over the PM arrays to the UM. Skip any that are a multi-parm entry. + for (uint32_t idx = 0; idx < oidsAggPm.size(); ++idx) + { + if (find (multiParmIndexes.begin(), multiParmIndexes.end(), idx ) != multiParmIndexes.end()) + { + continue; + } + oidsAggUm.push_back(oidsAggPm[idx]); + keysAggUm.push_back(keysAggPm[idx]); + scaleAggUm.push_back(scaleAggPm[idx]); + precisionAggUm.push_back(precisionAggPm[idx]); + widthAggUm.push_back(widthAggPm[idx]); + typeAggUm.push_back(typeAggPm[idx]); + } } @@ -4137,6 +4269,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4159,6 +4295,21 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colUm = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + ++multiParms; + continue; + } + + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + + if (udafc) + pUDAFFunc = udafc->getContext().getFunction(); + } + if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { @@ -4285,7 +4436,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it != aggFuncMap.end()) { - colUm = it->second; + colUm = it->second - multiParms; oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(keysAggUm[colUm]); scaleAggDist.push_back(scaleAggUm[colUm]); @@ -4309,7 +4460,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // false alarm returnColMissing = false; - colUm = it->second; + colUm = it->second - multiParms; if (aggOp == ROWAGG_SUM) { @@ -4412,21 +4563,36 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { + // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - pUDAFFunc = udafc->getContext().getFunction(); - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i)); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i-multiParms)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i-multiParms)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -4480,7 +4646,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = returnedColVec.size() - multiParms; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -4540,7 +4706,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; @@ -4687,6 +4853,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); groupBySub.push_back(groupby); + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; + // tricky part : 2 function vectors // -- dummy function vector for sub-aggregator, which does distinct only // -- aggregate function on this distinct column for rowAggDist @@ -4694,6 +4865,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } if (returnedColVec[k].first != distinctColKey) continue; @@ -4715,7 +4891,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -4732,9 +4908,15 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( { vector functionSub1 = functionNoDistVec; vector functionSub2; + int64_t multiParms = 0; for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } // search non-distinct functions in functionVec vector::iterator it = functionVecUm.begin(); @@ -4752,7 +4934,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } else if (f->fAggFunction == ROWAGG_COUNT_ASTERISK || @@ -4773,7 +4955,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 271508f42..9150d5393 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4038,6 +4038,10 @@ ParseTree* buildParseTree(Item_func* item, gp_walk_info& gwi, bool& nonSupport) ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { + // MCOL-1201 For UDAnF multiple parameters + vector selCols; + vector orderCols; + if (!(gwi.thd->infinidb_vtable.cal_conn_info)) gwi.thd->infinidb_vtable.cal_conn_info = (void*)(new cal_connection_info()); @@ -4054,6 +4058,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // N.B. argument_count() is the # of formal parms to the agg fcn. InifniDB only supports 1 argument // TODO: Support more than one parm +#if 0 if (isp->argument_count() != 1 && isp->sum_func() != Item_sum::GROUP_CONCAT_FUNC && isp->sum_func() != Item_sum::UDF_SUM_FUNC) { @@ -4061,7 +4066,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_MUL_ARG_AGG); return NULL; } - +#endif AggregateColumn* ac = NULL; if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) @@ -4084,444 +4089,509 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { gwi.fatalParseError = true; gwi.parseErrorText = "Non supported aggregate type on the select clause"; + if (ac) + delete ac; return NULL; } - // special parsing for group_concat - if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + try { - Item_func_group_concat* gc = (Item_func_group_concat*)isp; + + // special parsing for group_concat + if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + Item_func_group_concat* gc = (Item_func_group_concat*)isp; vector orderCols; - RowColumn* rowCol = new RowColumn(); + RowColumn* rowCol = new RowColumn(); vector selCols; - uint32_t select_ctn = gc->count_field(); - ReturnedColumn* rc = NULL; + uint32_t select_ctn = gc->count_field(); + ReturnedColumn* rc = NULL; - for (uint32_t i = 0; i < select_ctn; i++) - { - rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); - - if (!rc || gwi.fatalParseError) - return NULL; - - selCols.push_back(SRCP(rc)); - } - - ORDER** order_item, **end; - - for (order_item = gc->get_order(), - end = order_item + gc->order_field(); order_item < end; - order_item++) - { - Item* ord_col = *(*order_item)->item; - - if (ord_col->type() == Item::INT_ITEM) + for (uint32_t i = 0; i < select_ctn; i++) { - Item_int* id = (Item_int*)ord_col; - - if (id->val_int() > (int)selCols.size()) - { - gwi.fatalParseError = true; - return NULL; - } - - rc = selCols[id->val_int() - 1]->clone(); - rc->orderPos(id->val_int() - 1); - } - else - { - rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); + rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); if (!rc || gwi.fatalParseError) { + if (ac) + delete ac; return NULL; } + + selCols.push_back(SRCP(rc)); } - // 10.2 TODO: direction is now a tri-state flag - rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); - orderCols.push_back(SRCP(rc)); - } + ORDER** order_item, **end; - rowCol->columnVec(selCols); - (dynamic_cast(ac))->orderCols(orderCols); - parm.reset(rowCol); - - if (gc->str_separator()) - { - string separator; - separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); - (dynamic_cast(ac))->separator(separator); - } - } - else - { - for (uint32_t i = 0; i < isp->argument_count(); i++) - { - Item* sfitemp = sfitempp[i]; - Item::Type sfitype = sfitemp->type(); - - switch (sfitype) + for (order_item = gc->get_order(), + end = order_item + gc->order_field(); order_item < end; + order_item++) { - case Item::FIELD_ITEM: - { - Item_field* ifp = reinterpret_cast(sfitemp); - SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + Item* ord_col = *(*order_item)->item; - if (!sc) + if (ord_col->type() == Item::INT_ITEM) + { + Item_int* id = (Item_int*)ord_col; + + if (id->val_int() > (int)selCols.size()) { gwi.fatalParseError = true; - break; + if (ac) + delete ac; + return NULL; } - parm.reset(sc); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); - TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); - gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); - break; + rc = selCols[id->val_int() - 1]->clone(); + rc->orderPos(id->val_int() - 1); } - - case Item::INT_ITEM: - case Item::STRING_ITEM: - case Item::REAL_ITEM: - case Item::DECIMAL_ITEM: + else { - // treat as count(*) - if (ac->aggOp() == AggregateColumn::COUNT) - ac->aggOp(AggregateColumn::COUNT_ASTERISK); + rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::NULL_ITEM: - { - //ac->aggOp(AggregateColumn::COUNT); - parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); - //ac->functionParms(parm); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::FUNC_ITEM: - { - Item_func* ifp = (Item_func*)sfitemp; - ReturnedColumn* rc = 0; - - // check count(1+1) case - vector tmpVec; - uint16_t parseInfo = 0; - parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); - - if (parseInfo & SUB_BIT) + if (!rc || gwi.fatalParseError) { - gwi.fatalParseError = true; - break; - } - else if (!gwi.fatalParseError && - !(parseInfo & AGG_BIT) && - !(parseInfo & AF_BIT) && - tmpVec.size() == 0) - { - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - FunctionColumn* fc = dynamic_cast(rc); - - if ((fc && fc->functionParms().empty()) || !fc) - { - //ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (dynamic_cast(rc)) - { - //@bug5229. handle constant function on aggregate argument - ac->constCol(SRCP(rc)); - break; - } - } - } - - // MySQL carelessly allows correlated aggregate function on the WHERE clause. - // Here is the work around to deal with that inconsistence. - // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; - ClauseType clauseType = gwi.clauseType; - - if (gwi.clauseType == WHERE) - gwi.clauseType = HAVING; - - // @bug 3603. for cases like max(rand()). try to build function first. - if (!rc) - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - - parm.reset(rc); - gwi.clauseType = clauseType; - - if (gwi.fatalParseError) - break; - - //ac->functionParms(parm); - break; - } - - case Item::REF_ITEM: - { - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (rc) - { - parm.reset(rc); - //ac->functionParms(parm); - break; + if (ac) + delete ac; + return NULL; } } - default: - { - gwi.fatalParseError = true; - //gwi.parseErrorText = "Non-supported Item in Aggregate function"; - } + // 10.2 TODO: direction is now a tri-state flag + rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); + orderCols.push_back(SRCP(rc)); } - if (gwi.fatalParseError) + rowCol->columnVec(selCols); + (dynamic_cast(ac))->orderCols(orderCols); + parm.reset(rowCol); + + if (gc->str_separator()) { - if (gwi.parseErrorText.empty()) - { - Message::Args args; - - if (item->name) - args.add(item->name); - else - args.add(""); - - gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); - } - - return NULL; + string separator; + separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); + (dynamic_cast(ac))->separator(separator); } } - } - - if (parm) - { - ac->functionParms(parm); - - if (isp->sum_func() == Item_sum::AVG_FUNC || - isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::DECIMAL; - ct.colWidth = 8; - ct.scale += 4; - break; - -#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; -#endif - - default: - break; - } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::COUNT_FUNC || - isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = parm->resultType().scale; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_FUNC || - isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - ct.colDataType = CalpontSystemCatalog::BIGINT; - - // no break, let fall through - - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - ct.colWidth = 8; - break; - - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::UBIGINT; - ct.colWidth = 8; - break; - -#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; -#endif - - default: - break; - } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::STD_FUNC || - isp->sum_func() == Item_sum::VARIANCE_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - ct.scale = 0; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = 0; - ct.precision = -16; // borrowed to indicate skip null value check on connector - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) - { - //Item_func_group_concat* gc = (Item_func_group_concat*)isp; - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::VARCHAR; - ct.colWidth = isp->max_length; - ct.precision = 0; - ac->resultType(ct); - } else { - ac->resultType(parm->resultType()); + for (uint32_t i = 0; i < isp->argument_count(); i++) + { + Item* sfitemp = sfitempp[i]; + Item::Type sfitype = sfitemp->type(); + + switch (sfitype) + { + case Item::FIELD_ITEM: + { + Item_field* ifp = reinterpret_cast(sfitemp); + SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + + if (!sc) + { + gwi.fatalParseError = true; + break; + } + + parm.reset(sc); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); + TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); + gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); + break; + } + + case Item::INT_ITEM: + case Item::STRING_ITEM: + case Item::REAL_ITEM: + case Item::DECIMAL_ITEM: + { + // treat as count(*) + if (ac->aggOp() == AggregateColumn::COUNT) + ac->aggOp(AggregateColumn::COUNT_ASTERISK); + + ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + break; + } + + case Item::NULL_ITEM: + { + parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); + ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + break; + } + + case Item::FUNC_ITEM: + { + Item_func* ifp = (Item_func*)sfitemp; + ReturnedColumn* rc = 0; + + // check count(1+1) case + vector tmpVec; + uint16_t parseInfo = 0; + parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); + + if (parseInfo & SUB_BIT) + { + gwi.fatalParseError = true; + break; + } + else if (!gwi.fatalParseError && + !(parseInfo & AGG_BIT) && + !(parseInfo & AF_BIT) && + tmpVec.size() == 0) + { + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + FunctionColumn* fc = dynamic_cast(rc); + + if ((fc && fc->functionParms().empty()) || !fc) + { + //ac->aggOp(AggregateColumn::COUNT_ASTERISK); + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (dynamic_cast(rc)) + { + //@bug5229. handle constant function on aggregate argument + ac->constCol(SRCP(rc)); + break; + } + } + } + + // MySQL carelessly allows correlated aggregate function on the WHERE clause. + // Here is the work around to deal with that inconsistence. + // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; + ClauseType clauseType = gwi.clauseType; + + if (gwi.clauseType == WHERE) + gwi.clauseType = HAVING; + + // @bug 3603. for cases like max(rand()). try to build function first. + if (!rc) + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + + parm.reset(rc); + gwi.clauseType = clauseType; + + if (gwi.fatalParseError) + break; + + break; + } + + case Item::REF_ITEM: + { + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (rc) + { + parm.reset(rc); + break; + } + } + + default: + { + gwi.fatalParseError = true; + //gwi.parseErrorText = "Non-supported Item in Aggregate function"; + } + } + + if (gwi.fatalParseError) + { + if (gwi.parseErrorText.empty()) + { + Message::Args args; + + if (item->name) + args.add(item->name); + else + args.add(""); + + gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); + } + + if (ac) + delete ac; + return NULL; + } + if (parm) + { + // MCOL-1201 multi-argument aggregate + ac->aggParms().push_back(parm); + } + } } - } - else - { - ac->resultType(colType_MysqlToIDB(isp)); - } - // adjust decimal result type according to internalDecimalScale - if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) - { - CalpontSystemCatalog::ColType ct = ac->resultType(); - ct.scale = gwi.internalDecimalScale; - ac->resultType(ct); - } - - // check for same aggregate on the select list - ac->expressionId(ci->expressionId++); - - if (gwi.clauseType != SELECT) - { - for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) + // Get result type + // Modified for MCOL-1201 multi-argument aggregate + if (ac->aggParms().size() > 0) { - if (*ac == gwi.returnedCols[i].get()) - ac->expressionId(gwi.returnedCols[i]->expressionId()); - } - } + // These are all one parm functions, so we can safely + // use the first parm for result type. + parm = ac->aggParms()[0]; + if (isp->sum_func() == Item_sum::AVG_FUNC || + isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); - // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will - // be applied in ExeMgr. When the ExeMgr fix is available, this checking - // will be taken out. - if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + switch (ct.colDataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::DECIMAL; + ct.colWidth = 8; + ct.scale += 4; + break; + + #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; + #endif + + default: + break; + } + + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::COUNT_FUNC || + isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = parm->resultType().scale; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_FUNC || + isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); + + switch (ct.colDataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + ct.colDataType = CalpontSystemCatalog::BIGINT; + + // no break, let fall through + + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + ct.colWidth = 8; + break; + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::UBIGINT; + ct.colWidth = 8; + break; + + #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; + #endif + + default: + break; + } + + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::STD_FUNC || + isp->sum_func() == Item_sum::VARIANCE_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + ct.scale = 0; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = 0; + ct.precision = -16; // borrowed to indicate skip null value check on connector + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + //Item_func_group_concat* gc = (Item_func_group_concat*)isp; + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::VARCHAR; + ct.colWidth = isp->max_length; + ct.precision = 0; + ac->resultType(ct); + } + else + { + // UDAF result type will be set below. + ac->resultType(parm->resultType()); + } + } + else + { + ac->resultType(colType_MysqlToIDB(isp)); + } + + // adjust decimal result type according to internalDecimalScale + if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) + { + CalpontSystemCatalog::ColType ct = ac->resultType(); + ct.scale = gwi.internalDecimalScale; + ac->resultType(ct); + } + + // check for same aggregate on the select list + ac->expressionId(ci->expressionId++); + + if (gwi.clauseType != SELECT) + { + for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) + { + if (*ac == gwi.returnedCols[i].get()) + ac->expressionId(gwi.returnedCols[i]->expressionId()); + } + } + + // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will + // be applied in ExeMgr. When the ExeMgr fix is available, this checking + // will be taken out. + if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "No project column found for aggregate function"; + if (ac) + delete ac; + return NULL; + } + else if (ac->constCol()) + { + gwi.count_asterisk_list.push_back(ac); + } + + // For UDAF, populate the context and call the UDAF init() function. + // The return type is (should be) set in context by init(). + if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) + { + UDAFColumn* udafc = dynamic_cast(ac); + + if (udafc) + { + mcsv1Context& context = udafc->getContext(); + context.setName(isp->func_name()); + + // Set up the return type defaults for the call to init() + context.setResultType(udafc->resultType().colDataType); + context.setColWidth(udafc->resultType().colWidth); + context.setScale(udafc->resultType().scale); + context.setPrecision(udafc->resultType().precision); + + context.setParamCount(udafc->aggParms().size()); + ColumnDatum colType; + ColumnDatum colTypes[udafc->aggParms().size()]; + // Build the column type vector. + // Modified for MCOL-1201 multi-argument aggregate + for (uint32_t i = 0; i < udafc->aggParms().size(); ++i) + { + const execplan::CalpontSystemCatalog::ColType& resultType + = udafc->aggParms()[i]->resultType(); + colType.dataType = resultType.colDataType; + colType.precision = resultType.precision; + colType.scale = resultType.scale; + colTypes[i] = colType; + } + + // Call the user supplied init() + mcsv1sdk::mcsv1_UDAF* udaf = context.getFunction(); + if (!udaf) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "Aggregate Function " + context.getName() + " doesn't exist in the ColumnStore engine"; + if (ac) + delete ac; + return NULL; + } + if (udaf->init(&context, colTypes) == mcsv1_UDAF::ERROR) + { + gwi.fatalParseError = true; + gwi.parseErrorText = udafc->getContext().getErrorMessage(); + if (ac) + delete ac; + return NULL; + } + + // UDAF_OVER_REQUIRED means that this function is for Window + // Function only. Reject it here in aggregate land. + if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) + { + gwi.fatalParseError = true; + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, + context.getName()); + if (ac) + delete ac; + return NULL; + } + + // Set the return type as set in init() + CalpontSystemCatalog::ColType ct; + ct.colDataType = context.getResultType(); + ct.colWidth = context.getColWidth(); + ct.scale = context.getScale(); + ct.precision = context.getPrecision(); + udafc->resultType(ct); + } + } + + } + catch (std::logic_error e) { gwi.fatalParseError = true; - gwi.parseErrorText = "No project column found for aggregate function"; + gwi.parseErrorText = "error building Aggregate Function: "; + gwi.parseErrorText += e.what(); + if (ac) + delete ac; return NULL; } - else if (ac->constCol()) + catch (...) { - gwi.count_asterisk_list.push_back(ac); + gwi.fatalParseError = true; + gwi.parseErrorText = "error building Aggregate Function: Unspecified exception"; + if (ac) + delete ac; + return NULL; } - - // For UDAF, populate the context and call the UDAF init() function. - if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) - { - UDAFColumn* udafc = dynamic_cast(ac); - - if (udafc) - { - mcsv1Context& context = udafc->getContext(); - context.setName(isp->func_name()); - - // Set up the return type defaults for the call to init() - context.setResultType(udafc->resultType().colDataType); - context.setColWidth(udafc->resultType().colWidth); - context.setScale(udafc->resultType().scale); - context.setPrecision(udafc->resultType().precision); - - COL_TYPES colTypes; - execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; - - // Build the column type vector. For now, there is only one - colTypes.push_back(make_pair(udafc->functionParms()->alias(), udafc->functionParms()->resultType().colDataType)); - - // Call the user supplied init() - if (context.getFunction()->init(&context, colTypes) == mcsv1_UDAF::ERROR) - { - gwi.fatalParseError = true; - gwi.parseErrorText = udafc->getContext().getErrorMessage(); - return NULL; - } - - if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) - { - gwi.fatalParseError = true; - gwi.parseErrorText = - logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, - context.getName()); - return NULL; - } - - // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; - ct.colDataType = context.getResultType(); - ct.colWidth = context.getColWidth(); - ct.scale = context.getScale(); - ct.precision = context.getPrecision(); - udafc->resultType(ct); - } - } - return ac; } @@ -7839,7 +7909,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->functionParms(minSc); + (*coliter)->aggParms().push_back(minSc); } std::vector::iterator funciter; @@ -9949,7 +10019,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->functionParms(minSc); + (*coliter)->aggParms().push_back(minSc); } std::vector::iterator funciter; diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 1ee343e90..b39da4ea2 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -781,8 +781,11 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, bool h //double double_val = *(double*)(&value); //f2->store(double_val); - if (f2->decimals() < (uint32_t)row.getScale(s)) - f2->dec = (uint32_t)row.getScale(s); + if ((f2->decimals() == DECIMAL_NOT_SPECIFIED && row.getScale(s) > 0) + || f2->decimals() < row.getScale(s)) + { + f2->dec = row.getScale(s); + } f2->store(dl); @@ -5275,8 +5278,6 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE execplan::CalpontSelectExecutionPlan::ColumnMap::iterator colMapIter; execplan::CalpontSelectExecutionPlan::ColumnMap::iterator condColMapIter; execplan::ParseTree* ptIt; - execplan::ReturnedColumn* rcIt; - for (TABLE_LIST* tl = gi.groupByTables; tl; tl = tl->next_local) { mapiter = ci->tableMap.find(tl->table); diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp index 4b648cb15..8d68a6260 100644 --- a/dbcon/mysql/ha_window_function.cpp +++ b/dbcon/mysql/ha_window_function.cpp @@ -340,6 +340,7 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n ac->distinct(item_sum->has_with_distinct()); Window_spec* win_spec = wf->window_spec; SRCP srcp; + CalpontSystemCatalog::ColType ct; // For return type // arguments vector funcParms; @@ -370,18 +371,25 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n context.setColWidth(rt.colWidth); context.setScale(rt.scale); context.setPrecision(rt.precision); + context.setParamCount(funcParms.size()); + + mcsv1sdk::ColumnDatum colType; + mcsv1sdk::ColumnDatum colTypes[funcParms.size()]; // Turn on the Analytic flag so the function is aware it is being called // as a Window Function. context.setContextFlag(CONTEXT_IS_ANALYTIC); - COL_TYPES colTypes; - execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; - // Build the column type vector. + // Modified for MCOL-1201 multi-argument aggregate for (size_t i = 0; i < funcParms.size(); ++i) { - colTypes.push_back(make_pair(funcParms[i]->alias(), funcParms[i]->resultType().colDataType)); + const execplan::CalpontSystemCatalog::ColType& resultType + = funcParms[i]->resultType(); + colType.dataType = resultType.colDataType; + colType.precision = resultType.precision; + colType.scale = resultType.scale; + colTypes[i] = colType; } // Call the user supplied init() @@ -401,7 +409,6 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n } // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; ct.colDataType = context.getResultType(); ct.colWidth = context.getColWidth(); ct.scale = context.getScale(); @@ -419,10 +426,10 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n { case Item_sum::UDF_SUM_FUNC: { - uint64_t bIgnoreNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)); - char sIgnoreNulls[18]; - sprintf(sIgnoreNulls, "%lu", bIgnoreNulls); - srcp.reset(new ConstantColumn(sIgnoreNulls, (uint64_t)bIgnoreNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT + uint64_t bRespectNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) ? 0 : 1; + char sRespectNulls[18]; + sprintf(sRespectNulls, "%lu", bRespectNulls); + srcp.reset(new ConstantColumn(sRespectNulls, (uint64_t)bRespectNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT funcParms.push_back(srcp); break; } @@ -881,11 +888,13 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n return NULL; } - ac->resultType(colType_MysqlToIDB(item_sum)); - - // bug5736. Make the result type double for some window functions when - // infinidb_double_for_decimal_math is set. - ac->adjustResultType(); + if (item_sum->sum_func() != Item_sum::UDF_SUM_FUNC) + { + ac->resultType(colType_MysqlToIDB(item_sum)); + // bug5736. Make the result type double for some window functions when + // infinidb_double_for_decimal_math is set. + ac->adjustResultType(); + } ac->expressionId(ci->expressionId++); diff --git a/utils/common/any.hpp b/utils/common/any.hpp index be0ca679b..5408c5c87 100755 --- a/utils/common/any.hpp +++ b/utils/common/any.hpp @@ -9,123 +9,142 @@ * http://www.boost.org/LICENSE_1_0.txt */ +#include #include namespace static_any { namespace anyimpl { + struct bad_any_cast + { + }; - struct bad_any_cast - { - }; + struct empty_any + { + }; - struct empty_any - { - }; + struct base_any_policy + { + virtual void static_delete(void** x) = 0; + virtual void copy_from_value(void const* src, void** dest) = 0; + virtual void clone(void* const* src, void** dest) = 0; + virtual void move(void* const* src, void** dest) = 0; + virtual void* get_value(void** src) = 0; + virtual size_t get_size() = 0; + }; - struct base_any_policy - { - virtual void static_delete(void** x) = 0; - virtual void copy_from_value(void const* src, void** dest) = 0; - virtual void clone(void* const* src, void** dest) = 0; - virtual void move(void* const* src, void** dest) = 0; - virtual void* get_value(void** src) = 0; - virtual size_t get_size() = 0; - }; + template + struct typed_base_any_policy : base_any_policy + { + virtual size_t get_size() + { + return sizeof(T); + } + }; - template - struct typed_base_any_policy : base_any_policy - { - virtual size_t get_size() { return sizeof(T); } - }; + template + struct small_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) + { + } + virtual void copy_from_value(void const* src, void** dest) + { + new(dest) T(*reinterpret_cast(src)); + } + virtual void clone(void* const* src, void** dest) + { + *dest = *src; + } + virtual void move(void* const* src, void** dest) + { + *dest = *src; + } + virtual void* get_value(void** src) + { + return reinterpret_cast(src); + } + }; - template - struct small_any_policy : typed_base_any_policy - { - virtual void static_delete(void** x) { } - virtual void copy_from_value(void const* src, void** dest) - { new(dest) T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) { *dest = *src; } - virtual void move(void* const* src, void** dest) { *dest = *src; } - virtual void* get_value(void** src) { return reinterpret_cast(src); } - }; - - template - struct big_any_policy : typed_base_any_policy - { - virtual void static_delete(void** x) + template + struct big_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) { if (*x) - delete(*reinterpret_cast(x)); + delete(*reinterpret_cast(x)); *x = NULL; } - virtual void copy_from_value(void const* src, void** dest) + virtual void copy_from_value(void const* src, void** dest) { - *dest = new T(*reinterpret_cast(src)); + *dest = new T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) + virtual void clone(void* const* src, void** dest) { - *dest = new T(**reinterpret_cast(src)); + *dest = new T(**reinterpret_cast(src)); } - virtual void move(void* const* src, void** dest) + virtual void move(void* const* src, void** dest) { - (*reinterpret_cast(dest))->~T(); - **reinterpret_cast(dest) = **reinterpret_cast(src); + (*reinterpret_cast(dest))->~T(); + **reinterpret_cast(dest) = **reinterpret_cast(src); } - virtual void* get_value(void** src) { return *src; } - }; + virtual void* get_value(void** src) + { + return *src; + } + }; - template - struct choose_policy - { - typedef big_any_policy type; - }; + template + struct choose_policy + { + typedef big_any_policy type; + }; - template - struct choose_policy - { - typedef small_any_policy type; - }; + template + struct choose_policy + { + typedef small_any_policy type; + }; - struct any; + struct any; - /// Choosing the policy for an any type is illegal, but should never happen. - /// This is designed to throw a compiler error. - template<> - struct choose_policy - { - typedef void type; - }; + /// Choosing the policy for an any type is illegal, but should never happen. + /// This is designed to throw a compiler error. + template<> + struct choose_policy + { + typedef void type; + }; - /// Specializations for small types. - #define SMALL_POLICY(TYPE) template<> struct \ - choose_policy { typedef small_any_policy type; }; + /// Specializations for small types. +#define SMALL_POLICY(TYPE) template<> struct \ + choose_policy { typedef small_any_policy type; }; - SMALL_POLICY(char); - SMALL_POLICY(signed char); - SMALL_POLICY(unsigned char); - SMALL_POLICY(signed short); - SMALL_POLICY(unsigned short); - SMALL_POLICY(signed int); - SMALL_POLICY(unsigned int); - SMALL_POLICY(signed long); - SMALL_POLICY(unsigned long); - SMALL_POLICY(signed long long); - SMALL_POLICY(unsigned long long); - SMALL_POLICY(float); - SMALL_POLICY(double); - SMALL_POLICY(bool); + SMALL_POLICY(char); + SMALL_POLICY(signed char); + SMALL_POLICY(unsigned char); + SMALL_POLICY(signed short); + SMALL_POLICY(unsigned short); + SMALL_POLICY(signed int); + SMALL_POLICY(unsigned int); + SMALL_POLICY(signed long); + SMALL_POLICY(unsigned long); + SMALL_POLICY(signed long long); + SMALL_POLICY(unsigned long long); + SMALL_POLICY(float); + SMALL_POLICY(double); + SMALL_POLICY(bool); - #undef SMALL_POLICY +#undef SMALL_POLICY - /// This function will return a different policy for each type. - template - base_any_policy* get_policy() - { - static typename choose_policy::type policy; - return &policy; - }; + /// This function will return a different policy for each type. + template + base_any_policy* get_policy() + { + static typename choose_policy::type policy; + return &policy; + }; } class any @@ -139,37 +158,40 @@ public: /// Initializing constructor. template any(const T& x) - : policy(anyimpl::get_policy()), object(NULL) + : policy(anyimpl::get_policy()), object(NULL) { assign(x); } /// Empty constructor. any() - : policy(anyimpl::get_policy()), object(NULL) - { } + : policy(anyimpl::get_policy()), object(NULL) + { + } /// Special initializing constructor for string literals. any(const char* x) - : policy(anyimpl::get_policy()), object(NULL) - { + : policy(anyimpl::get_policy()), object(NULL) + { assign(x); } /// Copy constructor. any(const any& x) - : policy(anyimpl::get_policy()), object(NULL) - { + : policy(anyimpl::get_policy()), object(NULL) + { assign(x); } /// Destructor. - ~any() { + ~any() + { policy->static_delete(&object); } /// Assignment function from another any. - any& assign(const any& x) { + any& assign(const any& x) + { reset(); policy = x.policy; policy->clone(&x.object, &object); @@ -178,7 +200,8 @@ public: /// Assignment function. template - any& assign(const T& x) { + any& assign(const T& x) + { reset(); policy = anyimpl::get_policy(); policy->copy_from_value(&x, &object); @@ -197,8 +220,42 @@ public: return assign(x); } + /// Less than operator for sorting + bool operator<(const any& x) const + { + if (policy == x.policy) + { + void* p1 = const_cast(object); + void* p2 = const_cast(x.object); + return memcmp(policy->get_value(&p1), + x.policy->get_value(&p2), + policy->get_size()) < 0 ? 1 : 0; + } + return 0; + } + + /// equal operator + bool operator==(const any& x) const + { + if (policy == x.policy) + { + void* p1 = const_cast(object); + void* p2 = const_cast(x.object); + return memcmp(policy->get_value(&p1), + x.policy->get_value(&p2), + policy->get_size()) == 0 ? 1 : 0; + } + return 0; + } + /// Utility functions - any& swap(any& x) { + uint8_t getHash() const + { + void* p1 = const_cast(object); + return *(uint64_t*)policy->get_value(&p1) % 4048; + } + any& swap(any& x) + { std::swap(policy, x.policy); std::swap(object, x.object); return *this; @@ -206,27 +263,32 @@ public: /// Cast operator. You can only cast to the original type. template - T& cast() { - if (policy != anyimpl::get_policy()) + T& cast() + { + if (policy != anyimpl::get_policy()) throw anyimpl::bad_any_cast(); T* r = reinterpret_cast(policy->get_value(&object)); return *r; } /// Returns true if the any contains no value. - bool empty() const { + bool empty() const + { return policy == anyimpl::get_policy(); } /// Frees any allocated memory, and sets the value to NULL. - void reset() { + void reset() + { policy->static_delete(&object); policy = anyimpl::get_policy(); } /// Returns true if the two types are the same. - bool compatible(const any& x) const { + bool compatible(const any& x) const + { return policy == x.policy; } }; + } diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 8d110cfc8..c1f5bbd63 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -215,6 +215,22 @@ inline string getStringNullValue() namespace rowgroup { +const std::string typeStr(""); +const static_any::any& RowAggregation::charTypeId((char)1); +const static_any::any& RowAggregation::scharTypeId((signed char)1); +const static_any::any& RowAggregation::shortTypeId((short)1); +const static_any::any& RowAggregation::intTypeId((int)1); +const static_any::any& RowAggregation::longTypeId((long)1); +const static_any::any& RowAggregation::llTypeId((long long)1); +const static_any::any& RowAggregation::ucharTypeId((unsigned char)1); +const static_any::any& RowAggregation::ushortTypeId((unsigned short)1); +const static_any::any& RowAggregation::uintTypeId((unsigned int)1); +const static_any::any& RowAggregation::ulongTypeId((unsigned long)1); +const static_any::any& RowAggregation::ullTypeId((unsigned long long)1); +const static_any::any& RowAggregation::floatTypeId((float)1); +const static_any::any& RowAggregation::doubleTypeId((double)1); +const static_any::any& RowAggregation::strTypeId(typeStr); + KeyStorage::KeyStorage(const RowGroup& keys, Row** tRow) : tmpRow(tRow), rg(keys) { RGData data(rg); @@ -691,7 +707,8 @@ RowAggregation::RowAggregation(const vector& rowAggGroupByCol RowAggregation::RowAggregation(const RowAggregation& rhs): fAggMapPtr(NULL), fRowGroupOut(NULL), fTotalRowCount(0), fMaxTotalRowCount(AGG_ROWGROUP_SIZE), - fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0) + fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0), + fRGContext(rhs.fRGContext) { //fGroupByCols.clear(); //fFunctionCols.clear(); @@ -756,7 +773,6 @@ void RowAggregation::addRowGroup(const RowGroup* pRows, vector& in { // this function is for threaded aggregation, which is for group by and distinct. // if (countSpecial(pRows)) - Row rowIn; pRows->initRow(&rowIn); @@ -790,7 +806,7 @@ void RowAggregation::setJoinRowGroups(vector* pSmallSideRG, RowGroup* } //------------------------------------------------------------------------------ -// For UDAF, we need to sometimes start a new context. +// For UDAF, we need to sometimes start a new fRGContext. // // This will be called any number of times by each of the batchprimitiveprocessor // threads on the PM and by multple threads on the UM. It must remain @@ -801,29 +817,29 @@ void RowAggregation::resetUDAF(uint64_t funcColID) // Get the UDAF class pointer and store in the row definition object. RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColID].get()); - // resetUDAF needs to be re-entrant. Since we're modifying the context object - // by creating a new userData, we need a local copy. The copy constructor - // doesn't copy userData. - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + // RowAggregation and it's functions need to be re-entrant which means + // each instance (thread) needs its own copy of the context object. + // Note: operator=() doesn't copy userData. + fRGContext = rowUDAF->fUDAFContext; // Call the user reset for the group userData. Since, at this point, // context's userData will be NULL, reset will generate a new one. mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->reset(&rgContext); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } fRow.setUserDataStore(fRowGroupOut->getRGData()->getUserDataStore()); - fRow.setUserData(rgContext, - rgContext.getUserDataSP(), - rgContext.getUserDataSize(), + fRow.setUserData(fRGContext, + fRGContext.getUserDataSP(), + fRGContext.getUserDataSize(), rowUDAF->fAuxColumnIndex); - rgContext.setUserData(NULL); // Prevents calling deleteUserData on the context. + fRGContext.setUserData(NULL); // Prevents calling deleteUserData on the fRGContext. } //------------------------------------------------------------------------------ @@ -873,7 +889,6 @@ void RowAggregation::initialize() } } - // Save the RowGroup data pointer fResultDataVec.push_back(fRowGroupOut->getRGData()); @@ -1658,10 +1673,11 @@ void RowAggregation::updateEntry(const Row& rowIn) { for (uint64_t i = 0; i < fFunctionCols.size(); i++) { - int64_t colIn = fFunctionCols[i]->fInputColumnIndex; - int64_t colOut = fFunctionCols[i]->fOutputColumnIndex; + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[i]; + int64_t colIn = pFunctionCol->fInputColumnIndex; + int64_t colOut = pFunctionCol->fOutputColumnIndex; - switch (fFunctionCols[i]->fAggFunction) + switch (pFunctionCol->fAggFunction) { case ROWAGG_COUNT_COL_NAME: @@ -1675,7 +1691,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_MIN: case ROWAGG_MAX: case ROWAGG_SUM: - doMinMaxSum(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction); + doMinMaxSum(rowIn, colIn, colOut, pFunctionCol->fAggFunction); break; case ROWAGG_AVG: @@ -1692,7 +1708,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_BIT_OR: case ROWAGG_BIT_XOR: { - doBitOp(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction); + doBitOp(rowIn, colIn, colOut, pFunctionCol->fAggFunction); break; } @@ -1707,11 +1723,11 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); + RowUDAFFunctionCol* rowUDAF = dynamic_cast(pFunctionCol.get()); if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF); + doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF, i); } else { @@ -1725,7 +1741,7 @@ void RowAggregation::updateEntry(const Row& rowIn) { std::ostringstream errmsg; errmsg << "RowAggregation: function (id = " << - (uint64_t) fFunctionCols[i]->fAggFunction << ") is not supported."; + (uint64_t) pFunctionCol->fAggFunction << ") is not supported."; cerr << errmsg.str() << endl; throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); break; @@ -1997,131 +2013,142 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu } void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF) + RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { - std::vector valsIn; - execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupIn.getColTypes()[colIn]; - std::vector dataFlags; + int32_t paramCount = fRGContext.getParameterCount(); + // The vector of parameters to be sent to the UDAF + mcsv1sdk::ColumnDatum valsIn[paramCount]; + uint32_t dataFlags[paramCount]; - // Get the context for this rowGroup. Make a copy so we're thread safe. - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); - - // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; - - if (isNull(&fRowGroupIn, rowIn, colIn) == true) + execplan::CalpontSystemCatalog::ColDataType colDataType; + for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) { - if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + // Turn on NULL flags + dataFlags[i] = 0; + if (isNull(&fRowGroupIn, rowIn, colIn) == true) { - return; + if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + { + return; + } + dataFlags[i] |= mcsv1sdk::PARAM_IS_NULL; + } + + colDataType = fRowGroupIn.getColTypes()[colIn]; + if (!fRGContext.isParamNull(i)) + { + switch (colDataType) + { + case execplan::CalpontSystemCatalog::TINYINT: + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + case execplan::CalpontSystemCatalog::INT: + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + break; + } + + case execplan::CalpontSystemCatalog::UTINYINT: + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + case execplan::CalpontSystemCatalog::UINT: + case execplan::CalpontSystemCatalog::UBIGINT: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + datum.columnData = rowIn.getUintField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + { + datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; + datum.columnData = rowIn.getDoubleField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + { + datum.dataType = execplan::CalpontSystemCatalog::FLOAT; + datum.columnData = rowIn.getFloatField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + datum.columnData = rowIn.getUintField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::TIME: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + datum.columnData = rowIn.getIntField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + { + datum.dataType = colDataType; + datum.columnData = rowIn.getStringField(colIn); + break; + } + + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation " << fRGContext.getName() << + ": No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } + } } - flag |= mcsv1sdk::PARAM_IS_NULL; - } - - flags.push_back(flag); - rgContext.setDataFlags(&flags); - - mcsv1sdk::ColumnDatum datum; - - if (!rgContext.isParamNull(0)) - { - switch (colDataType) + // MCOL-1201: If there are multiple parameters, the next fFunctionCols + // will have the column used. By incrementing the funcColsIdx (passed by + // ref, we also increment the caller's index. + if (fFunctionCols.size() > funcColsIdx + 1 + && fFunctionCols[funcColsIdx+1]->fAggFunction == ROWAGG_MULTI_PARM) { - case execplan::CalpontSystemCatalog::TINYINT: - case execplan::CalpontSystemCatalog::SMALLINT: - case execplan::CalpontSystemCatalog::MEDINT: - case execplan::CalpontSystemCatalog::INT: - case execplan::CalpontSystemCatalog::BIGINT: - case execplan::CalpontSystemCatalog::DECIMAL: - case execplan::CalpontSystemCatalog::UDECIMAL: - { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - datum.scale = fRowGroupIn.getScale()[colIn]; - datum.precision = fRowGroupIn.getPrecision()[colIn]; - break; - } - - case execplan::CalpontSystemCatalog::UTINYINT: - case execplan::CalpontSystemCatalog::USMALLINT: - case execplan::CalpontSystemCatalog::UMEDINT: - case execplan::CalpontSystemCatalog::UINT: - case execplan::CalpontSystemCatalog::UBIGINT: - { - datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::DOUBLE: - case execplan::CalpontSystemCatalog::UDOUBLE: - { - datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; - datum.columnData = rowIn.getDoubleField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::FLOAT: - case execplan::CalpontSystemCatalog::UFLOAT: - { - datum.dataType = execplan::CalpontSystemCatalog::FLOAT; - datum.columnData = rowIn.getFloatField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::DATE: - case execplan::CalpontSystemCatalog::DATETIME: - { - datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::TIME: - { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::CHAR: - case execplan::CalpontSystemCatalog::VARCHAR: - case execplan::CalpontSystemCatalog::TEXT: - case execplan::CalpontSystemCatalog::VARBINARY: - case execplan::CalpontSystemCatalog::CLOB: - case execplan::CalpontSystemCatalog::BLOB: - { - datum.dataType = colDataType; - datum.columnData = rowIn.getStringField(colIn); - break; - } - - default: - { - std::ostringstream errmsg; - errmsg << "RowAggregation " << rgContext.getName() << - ": No logic for data type: " << colDataType; - throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); - break; - } + ++funcColsIdx; + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; + colIn = pFunctionCol->fInputColumnIndex; + colOut = pFunctionCol->fOutputColumnIndex; + } + else + { + break; } } - valsIn.push_back(datum); - // The intermediate values are stored in userData referenced by colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setDataFlags(dataFlags); + fRGContext.setUserData(fRow.getUserData(colAux)); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->nextValue(&rgContext, valsIn); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } } @@ -2218,6 +2245,7 @@ RowAggregationUM::RowAggregationUM(const RowAggregationUM& rhs) : fHasAvg(rhs.fHasAvg), fKeyOnHeap(rhs.fKeyOnHeap), fHasStatsFunc(rhs.fHasStatsFunc), + fHasUDAF(rhs.fHasUDAF), fExpression(rhs.fExpression), fTotalMemUsage(rhs.fTotalMemUsage), fRm(rhs.fRm), @@ -2419,7 +2447,7 @@ void RowAggregationUM::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { @@ -2585,22 +2613,6 @@ void RowAggregationUM::calculateAvgColumns() // Sets the value from valOut into column colOut, performing any conversions. void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) { - static const static_any::any& charTypeId((char)1); - static const static_any::any& scharTypeId((signed char)1); - static const static_any::any& shortTypeId((short)1); - static const static_any::any& intTypeId((int)1); - static const static_any::any& longTypeId((long)1); - static const static_any::any& llTypeId((long long)1); - static const static_any::any& ucharTypeId((unsigned char)1); - static const static_any::any& ushortTypeId((unsigned short)1); - static const static_any::any& uintTypeId((unsigned int)1); - static const static_any::any& ulongTypeId((unsigned long)1); - static const static_any::any& ullTypeId((unsigned long long)1); - static const static_any::any& floatTypeId((float)1); - static const static_any::any& doubleTypeId((double)1); - static const std::string typeStr(""); - static const static_any::any& strTypeId(typeStr); - execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; if (valOut.empty()) @@ -2609,6 +2621,179 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) return; } + int64_t intOut = 0; + uint64_t uintOut = 0; + float floatOut = 0.0; + double doubleOut = 0.0; + ostringstream oss; + std::string strOut; + + bool bSetSuccess = false; + switch (colDataType) + { + case execplan::CalpontSystemCatalog::BIT: + case execplan::CalpontSystemCatalog::TINYINT: + if (valOut.compatible(charTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + else if (valOut.compatible(scharTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + if (bSetSuccess) + { + fRow.setIntField<1>(intOut, colOut); + } + break; + + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + if (valOut.compatible(shortTypeId)) + { + intOut = valOut.cast(); + fRow.setIntField<2>(intOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::INT: + if (valOut.compatible(uintTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + else if (valOut.compatible(longTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + if (bSetSuccess) + { + fRow.setIntField<4>(intOut, colOut); + } + break; + + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + if (valOut.compatible(llTypeId)) + { + intOut = valOut.cast(); + fRow.setIntField<8>(intOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UTINYINT: + if (valOut.compatible(ucharTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<1>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + if (valOut.compatible(ushortTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<2>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UINT: + if (valOut.compatible(uintTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<4>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UBIGINT: + if (valOut.compatible(ulongTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<8>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + if (valOut.compatible(ulongTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<8>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + if (valOut.compatible(floatTypeId)) + { + floatOut = valOut.cast(); + fRow.setFloatField(floatOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + if (valOut.compatible(doubleTypeId)) + { + doubleOut = valOut.cast(); + fRow.setDoubleField(doubleOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + fRow.setStringField(strOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + fRow.setVarBinaryField(strOut, colOut); + bSetSuccess = true; + } + break; + + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation: No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } + } + if (!bSetSuccess) + { + SetUDAFAnyValue(valOut, colOut); + } +} + +void RowAggregationUM::SetUDAFAnyValue(static_any::any& valOut, int64_t colOut) +{ + execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; + // This may seem a bit convoluted. Users shouldn't return a type // that they didn't set in mcsv1_UDAF::init(), but this // handles whatever return type is given and casts @@ -2814,7 +2999,7 @@ void RowAggregationUM::calculateUDAFColumns() continue; rowUDAF = dynamic_cast(fFunctionCols[i].get()); - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + fRGContext = rowUDAF->fUDAFContext; int64_t colOut = rowUDAF->fOutputColumnIndex; int64_t colAux = rowUDAF->fAuxColumnIndex; @@ -2826,26 +3011,26 @@ void RowAggregationUM::calculateUDAFColumns() fRowGroupOut->getRow(j, &fRow); // Turn the NULL flag off. We can't know NULL at this point - rgContext.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); // The intermediate values are stored in colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setUserData(fRow.getUserData(colAux)); // Call the UDAF evaluate function mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->evaluate(&rgContext, valOut); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); } - rgContext.setUserData(NULL); + fRGContext.setUserData(NULL); } } @@ -3116,54 +3301,60 @@ void RowAggregationUM::doNullConstantAggregate(const ConstantAggData& aggData, u { // For a NULL constant, call nextValue with NULL and then evaluate. bool bInterrupted = false; - mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext); - context.setInterrupted(bInterrupted); - context.createUserData(); + fRGContext.setInterrupted(bInterrupted); + fRGContext.createUserData(); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - std::vector valsIn; + mcsv1sdk::ColumnDatum valsIn[1]; // Call a reset, then nextValue, then execute. This will evaluate // the UDAF for the constant. - rc = context.getFunction()->reset(&context); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } - +#if 0 + uint32_t dataFlags[fRGContext.getParameterCount()]; + for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) + { + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + // Turn on NULL flags + dataFlags[i] = 0; + } +#endif // Turn the NULL and CONSTANT flags on. - std::vector flags; - uint32_t flag = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; - flags.push_back(flag); - context.setDataFlags(&flags); + uint32_t flags[1]; + flags[0] = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; + fRGContext.setDataFlags(flags); // Create a dummy datum - mcsv1sdk::ColumnDatum datum; + mcsv1sdk::ColumnDatum& datum = valsIn[0]; datum.dataType = execplan::CalpontSystemCatalog::BIGINT; datum.columnData = 0; - valsIn.push_back(datum); - rc = context.getFunction()->nextValue(&context, valsIn); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } static_any::any valOut; - rc = context.getFunction()->evaluate(&context, valOut); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); - context.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); } break; @@ -3460,30 +3651,28 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData case ROWAGG_UDAF: { bool bInterrupted = false; - mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext); - context.setInterrupted(bInterrupted); - context.createUserData(); + fRGContext.setInterrupted(bInterrupted); + fRGContext.createUserData(); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - std::vector valsIn; + mcsv1sdk::ColumnDatum valsIn[1]; // Call a reset, then nextValue, then execute. This will evaluate // the UDAF for the constant. - rc = context.getFunction()->reset(&context); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Turn the CONSTANT flags on. - std::vector flags; - uint32_t flag = mcsv1sdk::PARAM_IS_CONSTANT; - flags.push_back(flag); - context.setDataFlags(&flags); + uint32_t flags[1]; + flags[0] = mcsv1sdk::PARAM_IS_CONSTANT; + fRGContext.setDataFlags(flags); // Create a datum item for sending to UDAF - mcsv1sdk::ColumnDatum datum; + mcsv1sdk::ColumnDatum& datum = valsIn[0]; datum.dataType = (CalpontSystemCatalog::ColDataType)colDataType; switch (colDataType) @@ -3567,27 +3756,27 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData break; } - valsIn.push_back(datum); - rc = context.getFunction()->nextValue(&context, valsIn); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } static_any::any valOut; - rc = context.getFunction()->evaluate(&context, valOut); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); - context.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); } break; @@ -3806,7 +3995,7 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { @@ -4011,45 +4200,43 @@ void RowAggregationUMP2::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut // rowUDAF(in) - pointer to the RowUDAFFunctionCol for this UDAF instance //------------------------------------------------------------------------------ void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF) + RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { static_any::any valOut; - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); // Get the user data boost::shared_ptr userData = rowIn.getUserData(colIn + 1); // Unlike other aggregates, the data isn't in colIn, so testing it for NULL // there won't help. In case of NULL, userData will be NULL. - std::vector flags; - uint32_t flag = 0; + uint32_t flags[1]; + flags[0] = 0; if (!userData) { - if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { return; } // Turn on NULL flags - flag |= mcsv1sdk::PARAM_IS_NULL; + flags[0] |= mcsv1sdk::PARAM_IS_NULL; } - flags.push_back(flag); - rgContext.setDataFlags(&flags); + fRGContext.setDataFlags(flags); // The intermediate values are stored in colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setUserData(fRow.getUserData(colAux)); // Call the UDAF subEvaluate method mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->subEvaluate(&rgContext, userData.get()); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userData.get()); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::IDBExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::IDBExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } } @@ -4246,7 +4433,7 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index b6294f193..282f354fc 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -110,6 +110,9 @@ enum RowAggFunctionType // User Defined Aggregate Function ROWAGG_UDAF, + // If an Aggregate has more than one parameter, this will be used for parameters after the first + ROWAGG_MULTI_PARM, + // internal function type to avoid duplicate the work // handling ROWAGG_COUNT_NO_OP, ROWAGG_DUP_FUNCT and ROWAGG_DUP_AVG is a little different // ROWAGG_COUNT_NO_OP : count done by AVG, no need to copy @@ -583,7 +586,7 @@ protected: virtual void doAvg(const Row&, int64_t, int64_t, int64_t); virtual void doStatistics(const Row&, int64_t, int64_t, int64_t); virtual void doBitOp(const Row&, int64_t, int64_t, int); - virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); + virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); virtual bool countSpecial(const RowGroup* pRG) { fRow.setIntField<8>(fRow.getIntField<8>(0) + pRG->getRowCount(), 0); @@ -660,6 +663,25 @@ protected: //need access to rowgroup storage holding the rows to hash & ==. friend class AggHasher; friend class AggComparator; + + // We need a separate copy for each thread. + mcsv1sdk::mcsv1Context fRGContext; + + // These are handy for testing the actual type of static_any for UDAF + static const static_any::any& charTypeId; + static const static_any::any& scharTypeId; + static const static_any::any& shortTypeId; + static const static_any::any& intTypeId; + static const static_any::any& longTypeId; + static const static_any::any& llTypeId; + static const static_any::any& ucharTypeId; + static const static_any::any& ushortTypeId; + static const static_any::any& uintTypeId; + static const static_any::any& ulongTypeId; + static const static_any::any& ullTypeId; + static const static_any::any& floatTypeId; + static const static_any::any& doubleTypeId; + static const static_any::any& strTypeId; }; //------------------------------------------------------------------------------ @@ -783,6 +805,9 @@ protected: // Sets the value from valOut into column colOut, performing any conversions. void SetUDAFValue(static_any::any& valOut, int64_t colOut); + // If the datatype returned by evaluate isn't what we expect, convert. + void SetUDAFAnyValue(static_any::any& valOut, int64_t colOut); + // calculate the UDAF function all rows received. UM only function. void calculateUDAFColumns(); @@ -877,7 +902,7 @@ protected: void doStatistics(const Row&, int64_t, int64_t, int64_t); void doGroupConcat(const Row&, int64_t, int64_t); void doBitOp(const Row&, int64_t, int64_t, int); - void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); + void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); bool countSpecial(const RowGroup* pRG) { return false; diff --git a/utils/udfsdk/CMakeLists.txt b/utils/udfsdk/CMakeLists.txt index e69ff4d88..01009e35a 100755 --- a/utils/udfsdk/CMakeLists.txt +++ b/utils/udfsdk/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ########### next target ############### -set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp) +set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp regr_avgx.cpp avgx.cpp) add_definitions(-DMYSQL_DYNAMIC_PLUGIN) diff --git a/utils/udfsdk/allnull.cpp b/utils/udfsdk/allnull.cpp index b6b8d79da..247b9e28f 100644 --- a/utils/udfsdk/allnull.cpp +++ b/utils/udfsdk/allnull.cpp @@ -27,11 +27,11 @@ struct allnull_data #define OUT_TYPE int64_t mcsv1_UDAF::ReturnCode allnull::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { context->setUserDataSize(sizeof(allnull_data)); - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -52,8 +52,7 @@ mcsv1_UDAF::ReturnCode allnull::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { struct allnull_data* data = (struct allnull_data*)context->getUserData()->data; diff --git a/utils/udfsdk/allnull.h b/utils/udfsdk/allnull.h index 86697b052..da17f5d6b 100644 --- a/utils/udfsdk/allnull.h +++ b/utils/udfsdk/allnull.h @@ -103,7 +103,7 @@ public: * colTypes or wrong number of arguments. Else return * mcsv1_UDAF::SUCCESS. */ - virtual ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); /** * reset() @@ -138,7 +138,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() diff --git a/utils/udfsdk/avg_mode.cpp b/utils/udfsdk/avg_mode.cpp index f39b5e402..5429183d9 100644 --- a/utils/udfsdk/avg_mode.cpp +++ b/utils/udfsdk/avg_mode.cpp @@ -25,9 +25,9 @@ using namespace mcsv1sdk; mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("avg_mode() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode avg_mode::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; MODE_DATA& data = static_cast(context->getUserData())->mData; @@ -187,8 +186,7 @@ mcsv1_UDAF::ReturnCode avg_mode::evaluate(mcsv1Context* context, static_any::any return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; MODE_DATA& data = static_cast(context->getUserData())->mData; diff --git a/utils/udfsdk/avg_mode.h b/utils/udfsdk/avg_mode.h index 4f3442005..5722c5fea 100644 --- a/utils/udfsdk/avg_mode.h +++ b/utils/udfsdk/avg_mode.h @@ -18,7 +18,7 @@ /*********************************************************************** * $Id$ * -* mcsv1_UDAF.h +* avg_mode.h ***********************************************************************/ /** @@ -50,8 +50,8 @@ * is also used to describe the interface that is used for * either. */ -#ifndef HEADER_mode -#define HEADER_mode +#ifndef HEADER_avg_mode +#define HEADER_avg_mode #include #include @@ -134,7 +134,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -169,8 +169,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -246,8 +245,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() diff --git a/utils/udfsdk/mcsv1_udaf.cpp b/utils/udfsdk/mcsv1_udaf.cpp index 349a642ec..ee08dcc07 100644 --- a/utils/udfsdk/mcsv1_udaf.cpp +++ b/utils/udfsdk/mcsv1_udaf.cpp @@ -36,6 +36,8 @@ UDAF_MAP UDAFMap::fm; #include "ssq.h" #include "median.h" #include "avg_mode.h" +#include "regr_avgx.h" +#include "avgx.h" UDAF_MAP& UDAFMap::getMap() { if (fm.size() > 0) @@ -52,6 +54,8 @@ UDAF_MAP& UDAFMap::getMap() fm["ssq"] = new ssq(); fm["median"] = new median(); fm["avg_mode"] = new avg_mode(); + fm["regr_avgx"] = new regr_avgx(); + fm["avgx"] = new avgx(); return fm; } @@ -115,8 +119,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const { // We don't test the per row data fields. They don't determine // if it's the same Context. - if (getName() != c.getName() - || fRunFlags != c.fRunFlags + if (getName() != c.getName() + ||fRunFlags != c.fRunFlags || fContextFlags != c.fContextFlags || fUserDataSize != c.fUserDataSize || fResultType != c.fResultType @@ -125,7 +129,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const || fStartFrame != c.fStartFrame || fEndFrame != c.fEndFrame || fStartConstant != c.fStartConstant - || fEndConstant != c.fEndConstant) + || fEndConstant != c.fEndConstant + || fParamCount != c.fParamCount) return false; return true; @@ -217,6 +222,7 @@ void mcsv1Context::serialize(messageqcpp::ByteStream& b) const b << (uint32_t)fEndFrame; b << fStartConstant; b << fEndConstant; + b << fParamCount; } void mcsv1Context::unserialize(messageqcpp::ByteStream& b) @@ -238,6 +244,7 @@ void mcsv1Context::unserialize(messageqcpp::ByteStream& b) fEndFrame = (WF_FRAME)frame; b >> fStartConstant; b >> fEndConstant; + b >> fParamCount; } void UserData::serialize(messageqcpp::ByteStream& bs) const diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index d24852c28..df3f47649 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -77,6 +77,7 @@ #include "any.hpp" #include "calpontsystemcatalog.h" #include "wf_frame.h" +#include "my_decimal_limits.h" using namespace execplan; @@ -200,12 +201,8 @@ static uint64_t CONTEXT_IS_PM __attribute__ ((unused)) = 1 << 2; // Flags that describe the contents of a specific input parameter // These will be set in context->dataFlags for each method call by the framework. // User code shouldn't use these directly -static uint64_t PARAM_IS_NULL __attribute__ ((unused)) = 1; -static uint64_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; - -// shorthand for the list of columns in the call sent to init() -// first is the actual column name and second is the data type in Columnstore. -typedef std::vector >COL_TYPES; +static uint32_t PARAM_IS_NULL __attribute__ ((unused)) = 1; +static uint32_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; // This is the context class that is passed to all API callbacks // The framework potentially sets data here for each invocation of @@ -269,7 +266,9 @@ public: EXPORT bool isPM(); // Parameter refinement description accessors - // valid in nextValue and dropValue + + // How many actual parameters were entered. + // valid in all calls size_t getParameterCount() const; // Determine if an input parameter is NULL @@ -298,6 +297,7 @@ public: // This only makes sense if the return type is decimal, but should be set // to (0, -1) for other types if the inout is decimal. // valid in init() + // Set the scale to DECIMAL_NOT_SPECIFIED if you want a floating decimal. EXPORT bool setScale(int32_t scale); EXPORT bool setPrecision(int32_t precision); @@ -372,7 +372,7 @@ private: int32_t fResultscale; // For scale, the number of digits to the right of the decimal int32_t fResultPrecision; // The max number of digits allowed in the decimal value std::string errorMsg; - std::vector* dataFlags; // one entry for each parameter + uint32_t* dataFlags; // an integer array wirh one entry for each parameter bool* bInterrupted; // Gets set to true by the Framework if something happens WF_FRAME fStartFrame; // Is set to default to start, then modified by the actual frame in the call WF_FRAME fEndFrame; // Is set to default to start, then modified by the actual frame in the call @@ -380,6 +380,7 @@ private: int32_t fEndConstant; // for end frame WF_PRECEEDIMG or WF_FOLLOWING std::string functionName; mcsv1sdk::mcsv1_UDAF* func; + int32_t fParamCount; public: // For use by the framework @@ -394,13 +395,14 @@ public: EXPORT void clearContextFlag(uint64_t flag); EXPORT uint64_t getContextFlags() const; EXPORT uint32_t getUserDataSize() const; - EXPORT std::vector& getDataFlags(); - EXPORT void setDataFlags(std::vector* flags); + EXPORT uint32_t* getDataFlags(); + EXPORT void setDataFlags(uint32_t* flags); EXPORT void setInterrupted(bool interrupted); EXPORT void setInterrupted(bool* interrupted); EXPORT mcsv1sdk::mcsv1_UDAF* getFunction(); EXPORT mcsv1sdk::mcsv1_UDAF* getFunction() const; EXPORT boost::shared_ptr getUserDataSP(); + EXPORT void setParamCount(int32_t paramCount); }; // Since aggregate functions can operate on any data type, we use the following structure @@ -419,9 +421,10 @@ public: struct ColumnDatum { CalpontSystemCatalog::ColDataType dataType; // defined in calpontsystemcatalog.h - static_any::any columnData; + static_any::any columnData; // Not valid in init() uint32_t scale; // If dataType is a DECIMAL type uint32_t precision; // If dataType is a DECIMAL type + std::string alias; // Only filled in for init() ColumnDatum() : dataType(CalpontSystemCatalog::UNDEFINED), scale(0), precision(-1) {}; }; @@ -466,7 +469,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes) = 0; + ColumnDatum* colTypes) = 0; /** * reset() @@ -501,8 +504,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn) = 0; + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn) = 0; /** * subEvaluate() @@ -579,8 +581,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() @@ -640,32 +641,32 @@ inline mcsv1Context::mcsv1Context() : fEndFrame(WF_CURRENT_ROW), fStartConstant(0), fEndConstant(0), - func(NULL) + func(NULL), + fParamCount(0) { } inline mcsv1Context::mcsv1Context(const mcsv1Context& rhs) : - fContextFlags(0), - fColWidth(0), - dataFlags(NULL), - bInterrupted(NULL), - func(NULL) + dataFlags(NULL) { copy(rhs); } inline mcsv1Context& mcsv1Context::copy(const mcsv1Context& rhs) { - fRunFlags = rhs.getRunFlags(); - fResultType = rhs.getResultType(); - fUserDataSize = rhs.getUserDataSize(); - fResultscale = rhs.getScale(); - fResultPrecision = rhs.getPrecision(); + fRunFlags = rhs.fRunFlags; + fContextFlags = rhs.fContextFlags; + fResultType = rhs.fResultType; + fUserDataSize = rhs.fUserDataSize; + fColWidth = rhs.fColWidth; + fResultscale = rhs.fResultscale; + fResultPrecision = rhs.fResultPrecision; rhs.getStartFrame(fStartFrame, fStartConstant); rhs.getEndFrame(fEndFrame, fEndConstant); - functionName = rhs.getName(); - bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference - func = rhs.func; + functionName = rhs.functionName; + bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference + func = rhs.func; + fParamCount = rhs.fParamCount; return *this; } @@ -675,11 +676,7 @@ inline mcsv1Context::~mcsv1Context() inline mcsv1Context& mcsv1Context::operator=(const mcsv1Context& rhs) { - fContextFlags = 0; - fColWidth = 0; dataFlags = NULL; - bInterrupted = NULL; - func = NULL; return copy(rhs); } @@ -753,16 +750,13 @@ inline bool mcsv1Context::isPM() inline size_t mcsv1Context::getParameterCount() const { - if (dataFlags) - return dataFlags->size(); - - return 0; + return fParamCount; } inline bool mcsv1Context::isParamNull(int paramIdx) { if (dataFlags) - return (*dataFlags)[paramIdx] & PARAM_IS_NULL; + return dataFlags[paramIdx] & PARAM_IS_NULL; return false; } @@ -770,7 +764,7 @@ inline bool mcsv1Context::isParamNull(int paramIdx) inline bool mcsv1Context::isParamConstant(int paramIdx) { if (dataFlags) - return (*dataFlags)[paramIdx] & PARAM_IS_CONSTANT; + return dataFlags[paramIdx] & PARAM_IS_CONSTANT; return false; } @@ -939,18 +933,22 @@ inline uint32_t mcsv1Context::getUserDataSize() const return fUserDataSize; } -inline std::vector& mcsv1Context::getDataFlags() +inline uint32_t* mcsv1Context::getDataFlags() { - return *dataFlags; + return dataFlags; } -inline void mcsv1Context::setDataFlags(std::vector* flags) +inline void mcsv1Context::setDataFlags(uint32_t* flags) { dataFlags = flags; } -inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, - std::vector& valsDropped) +inline void mcsv1Context::setParamCount(int32_t paramCount) +{ + fParamCount = paramCount; +} + +inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { return NOT_IMPLEMENTED; } diff --git a/utils/udfsdk/median.cpp b/utils/udfsdk/median.cpp index e32d721f1..9c7e72dc3 100644 --- a/utils/udfsdk/median.cpp +++ b/utils/udfsdk/median.cpp @@ -25,9 +25,9 @@ using namespace mcsv1sdk; mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("median() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode median::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; MEDIAN_DATA& data = static_cast(context->getUserData())->mData; @@ -212,8 +211,7 @@ mcsv1_UDAF::ReturnCode median::evaluate(mcsv1Context* context, static_any::any& return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; MEDIAN_DATA& data = static_cast(context->getUserData())->mData; diff --git a/utils/udfsdk/median.h b/utils/udfsdk/median.h index d64792461..142be6ba8 100644 --- a/utils/udfsdk/median.h +++ b/utils/udfsdk/median.h @@ -134,7 +134,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -169,8 +169,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -246,8 +245,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() diff --git a/utils/udfsdk/ssq.cpp b/utils/udfsdk/ssq.cpp index 4d9ef7e10..20fdc33db 100644 --- a/utils/udfsdk/ssq.cpp +++ b/utils/udfsdk/ssq.cpp @@ -34,9 +34,9 @@ struct ssq_data #define OUT_TYPE int64_t mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -44,13 +44,13 @@ mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("ssq() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -81,8 +81,7 @@ mcsv1_UDAF::ReturnCode ssq::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; @@ -183,8 +182,7 @@ mcsv1_UDAF::ReturnCode ssq::evaluate(mcsv1Context* context, static_any::any& val return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; diff --git a/utils/udfsdk/ssq.h b/utils/udfsdk/ssq.h index 514c7a3f0..2cac61c2c 100644 --- a/utils/udfsdk/ssq.h +++ b/utils/udfsdk/ssq.h @@ -114,7 +114,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -147,8 +147,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -224,8 +223,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); protected: }; diff --git a/utils/udfsdk/udfmysql.cpp b/utils/udfsdk/udfmysql.cpp index 981651c43..dc0277ccc 100644 --- a/utils/udfsdk/udfmysql.cpp +++ b/utils/udfsdk/udfmysql.cpp @@ -490,6 +490,168 @@ extern "C" // return data->sumsq; return 0; } + +//======================================================================= + + /** + * regr_avgx connector stub + */ + struct regr_avgx_data + { + double sumx; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_avgx_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_avgx() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // TODO test for NULL in x and y + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + ++data->cnt; + data->sumx += xval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } + +//======================================================================= + + /** + * avgx connector stub. Exactly the same functionality as the + * built in avg() function. Use to test the performance of the + * API + */ + struct avgx_data + { + double sumx; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct avgx_data* data; + if (args->arg_count != 1) + { + strcpy(message,"avgx() requires one argument"); + return 1; + } + + if (!(data = (struct avgx_data*) malloc(sizeof(struct avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct avgx_data* data = (struct avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // TODO test for NULL in x and y + struct avgx_data* data = (struct avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + ++data->cnt; + data->sumx += xval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct avgx_data* data = (struct avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } } // vim:ts=4 sw=4: diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index 664b0e7de..fe1f3fd0e 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -204,8 +204,10 @@ Filters="*.c;*.C;*.cc;*.cpp;*.cp;*.cxx;*.c++;*.prg;*.pas;*.dpr;*.asm;*.s;*.bas;*.java;*.cs;*.sc;*.e;*.cob;*.html;*.rc;*.tcl;*.py;*.pl;*.d"> + + @@ -215,8 +217,10 @@ Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if"> + + diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index f302c49cd..5cd5243c5 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -52,6 +52,7 @@ using namespace joblist; namespace windowfunction { + template boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context) { @@ -142,7 +143,7 @@ template void WF_udaf::resetData() { getContext().getFunction()->reset(&getContext()); - fSet.clear(); + fDistinctSet.clear(); WindowFunctionType::resetData(); } @@ -150,8 +151,8 @@ template void WF_udaf::parseParms(const std::vector& parms) { bRespectNulls = true; - // parms[1]: respect null | ignore null - ConstantColumn* cc = dynamic_cast(parms[1].get()); + // The last parms: respect null | ignore null + ConstantColumn* cc = dynamic_cast(parms[parms.size()-1].get()); idbassert(cc != NULL); bool isNull = false; // dummy, harded coded bRespectNulls = (cc->getIntVal(fRow, isNull) > 0); @@ -167,52 +168,71 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) } mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - uint64_t colOut = fFieldIndex[0]; - uint64_t colIn = fFieldIndex[1]; - mcsv1sdk::ColumnDatum datum; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colOut); + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC); + + // Put the parameter metadata (type, scale, precision) into valsIn + mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) + { + uint64_t colIn = fFieldIndex[i+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } for (int64_t i = b; i < e; i++) { if (i % 1000 == 0 && fStep->cancelled()) break; + bool bHasNull = false; fRow.setData(getPointer(fRowData->at(i))); // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; + uint32_t flags[getContext().getParameterCount()]; - if (fRow.isNullValue(colIn) == true) + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - if (!bRespectNulls) + uint64_t colIn = fFieldIndex[k+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[k]; + flags[k] = 0; + if (fRow.isNullValue(colIn) == true) { - continue; + if (!bRespectNulls) + { + bHasNull = true; + break; + } + + flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - flag |= mcsv1sdk::PARAM_IS_NULL; + T valIn; + getValue(colIn, valIn, &datum.dataType); + + // Check for distinct, if turned on. + // Currently, distinct only works for param 1 + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + + datum.columnData = valIn; } - - flags.push_back(flag); - getContext().setDataFlags(&flags); - - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - // TODO: when we impliment distinct, we need to revist this. - if ((fDistinct) || (fSet.find(valIn) != fSet.end())) + if (bHasNull) { continue; } - datum.columnData = valIn; - - std::vector valsIn; - valsIn.push_back(datum); - rc = getContext().getFunction()->dropValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::NOT_IMPLEMENTED) @@ -442,59 +462,191 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) else if (fPrev <= e && fPrev > c) e = c; - uint64_t colIn = fFieldIndex[1]; + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC); - mcsv1sdk::ColumnDatum datum; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colOut); + // Put the parameter metadata (type, scale, precision) into valsIn + mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) + { + uint64_t colIn = fFieldIndex[i+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } if (b <= c && c <= e) getContext().setContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); else getContext().clearContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); - + bool bHasNull = false; for (int64_t i = b; i <= e; i++) { if (i % 1000 == 0 && fStep->cancelled()) break; fRow.setData(getPointer(fRowData->at(i))); - // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; - if (fRow.isNullValue(colIn) == true) + // NULL flags + uint32_t flags[getContext().getParameterCount()]; + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - if (!bRespectNulls) + uint64_t colIn = fFieldIndex[k+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[k]; + + // Turn on Null flags or skip based on respect nulls + flags[k] = 0; + if (fRow.isNullValue(colIn) == true) + { + if (!bRespectNulls) + { + bHasNull = true; + break; + } + + flags[k] |= mcsv1sdk::PARAM_IS_NULL; + } + + // MCOL-1201 Multi-Paramter calls + switch (datum.dataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + { + int64_t valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + case CalpontSystemCatalog::UDECIMAL: + { + uint64_t valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + { + double valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + { + float valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::BLOB: + { + string valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + default: + { + string errStr = "(" + colType2String[i] + ")"; + errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + + break; + } + } + // Skip if any value is NULL and respect nulls is off. + if (bHasNull) { continue; } - - flag |= mcsv1sdk::PARAM_IS_NULL; } - - flags.push_back(flag); - getContext().setDataFlags(&flags); - - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - if ((fDistinct) || (fSet.find(valIn) != fSet.end())) - { - continue; - } - - if (fDistinct) - fSet.insert(valIn); - - datum.columnData = valIn; - - std::vector valsIn; - valsIn.push_back(datum); - + getContext().setDataFlags(flags); + rc = getContext().getFunction()->nextValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) diff --git a/utils/windowfunction/wf_udaf.h b/utils/windowfunction/wf_udaf.h index babb32565..f7a4c4b08 100644 --- a/utils/windowfunction/wf_udaf.h +++ b/utils/windowfunction/wf_udaf.h @@ -21,13 +21,35 @@ #ifndef UTILS_WF_UDAF_H #define UTILS_WF_UDAF_H -#include +#ifndef _MSC_VER +#include +#else +#include +#endif #include "windowfunctiontype.h" #include "mcsv1_udaf.h" namespace windowfunction { +// Hash classes for the distinct hashmap +class DistinctHasher +{ +public: + inline size_t operator()(const static_any::any& a) const + { + return a.getHash(); + } +}; + +class DistinctEqual +{ +public: + inline bool operator()(const static_any::any& lhs, static_any::any& rhs) const + { + return lhs == rhs; + } +}; // A class to control the execution of User Define Analytic Functions (UDAnF) // as defined by a specialization of mcsv1sdk::mcsv1_UDAF @@ -72,7 +94,8 @@ protected: bool fDistinct; bool bRespectNulls; // respect null | ignore null bool bHasDropValue; // Set to false when we discover the UDAnF doesn't implement dropValue. - std::set fSet; // To hold distinct values + // To hold distinct values + std::tr1::unordered_set fDistinctSet; static_any::any fValOut; // The return value public: diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp index 950045899..4c5b4de32 100644 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -492,10 +492,10 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos) static uint64_t dateNull = joblist::DATENULL; static uint64_t datetimeNull = joblist::DATETIMENULL; static uint64_t timeNull = joblist::TIMENULL; - static uint64_t char1Null = joblist::CHAR1NULL; - static uint64_t char2Null = joblist::CHAR2NULL; - static uint64_t char4Null = joblist::CHAR4NULL; - static uint64_t char8Null = joblist::CHAR8NULL; +// static uint64_t char1Null = joblist::CHAR1NULL; +// static uint64_t char2Null = joblist::CHAR2NULL; +// static uint64_t char4Null = joblist::CHAR4NULL; +// static uint64_t char8Null = joblist::CHAR8NULL; static string stringNull(""); void* v = NULL; diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index 7cd275021..71d0e1fbd 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -1280,7 +1280,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -2025,10 +2025,10 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 0; k < colStructList.size(); k++) + for (size_t k = 0; k < colStructList.size(); k++) { // Skip the selected column - if (k == colId) + if (k == (size_t)colId) continue; Column expandCol; @@ -2583,7 +2583,7 @@ int WriteEngineWrapper::insertColumnRec_SYS(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -3278,7 +3278,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; From c67ac7699e4c9080e79b18fee6a25d8956e628e2 Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 11 May 2018 09:53:49 -0500 Subject: [PATCH 12/19] MCOL-1201 Add test UDAF back in after rebase --- utils/udfsdk/avgx.cpp | 257 +++++++++++++++++++++++++++++++++++ utils/udfsdk/avgx.h | 99 ++++++++++++++ utils/udfsdk/regr_avgx.cpp | 270 +++++++++++++++++++++++++++++++++++++ utils/udfsdk/regr_avgx.h | 99 ++++++++++++++ 4 files changed, 725 insertions(+) create mode 100644 utils/udfsdk/avgx.cpp create mode 100644 utils/udfsdk/avgx.h create mode 100644 utils/udfsdk/regr_avgx.cpp create mode 100644 utils/udfsdk/regr_avgx.h diff --git a/utils/udfsdk/avgx.cpp b/utils/udfsdk/avgx.cpp new file mode 100644 index 000000000..887a8418e --- /dev/null +++ b/utils/udfsdk/avgx.cpp @@ -0,0 +1,257 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "avgx.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +#define DATATYPE double + +// Use the simple data model +struct avgx_data +{ + double sum; + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode avgx::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 1) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("avgx() with other than 1 arguments"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[0].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("avgx() with a non-numeric x argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(avgx_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 4); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode avgx::reset(mcsv1Context* context) +{ + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + data->sum = 0; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_x = valsIn[0].columnData; + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[0].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum += val; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct avgx_data* outData = (struct avgx_data*)context->getUserData()->data; + struct avgx_data* inData = (struct avgx_data*)userDataIn->data; + + outData->sum += inData->sum; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + + valOut = data->sum / (double)data->cnt; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_x = valsDropped[0].columnData; + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[0].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum -= val; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/avgx.h b/utils/udfsdk/avgx.h new file mode 100644 index 000000000..0569b6091 --- /dev/null +++ b/utils/udfsdk/avgx.h @@ -0,0 +1,99 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* avgx.h +***********************************************************************/ + +/** + * Columnstore interface for for the avgx function + * + * + * CREATE AGGREGATE FUNCTION avgx returns REAL soname + * 'libudf_mysql.so'; + * + */ +#ifndef HEADER_avgx +#define HEADER_avgx + +#include +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. + +// Return the avgx value of the dataset + +class avgx : public mcsv1_UDAF +{ +public: + // Defaults OK + avgx() : mcsv1_UDAF() {}; + virtual ~avgx() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_.h + diff --git a/utils/udfsdk/regr_avgx.cpp b/utils/udfsdk/regr_avgx.cpp new file mode 100644 index 000000000..c7cc5b56e --- /dev/null +++ b/utils/udfsdk/regr_avgx.cpp @@ -0,0 +1,270 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_avgx.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +#define DATATYPE double + +// Use the simple data model +struct regr_avgx_data +{ + double sum; + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode regr_avgx::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgx() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[1].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgx() with a non-numeric x argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_avgx_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[1].scale + 4); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_avgx::reset(mcsv1Context* context) +{ + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + data->sum = 0; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (context->isParamNull(0) || context->isParamNull(1)) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + if (valIn_x.empty() || valIn_y.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[1].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum += val; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_avgx_data* outData = (struct regr_avgx_data*)context->getUserData()->data; + struct regr_avgx_data* inData = (struct regr_avgx_data*)userDataIn->data; + + outData->sum += inData->sum; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + + if (data->cnt == 0) + { + valOut = 0; + } + else + { + valOut = data->sum / (double)data->cnt; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty() || valIn_y.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[1].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum -= val; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/regr_avgx.h b/utils/udfsdk/regr_avgx.h new file mode 100644 index 000000000..f70f30d8c --- /dev/null +++ b/utils/udfsdk/regr_avgx.h @@ -0,0 +1,99 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_avgx.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_avgx function + * + * + * CREATE AGGREGATE FUNCTION regr_avgx returns REAL soname + * 'libudf_mysql.so'; + * + */ +#ifndef HEADER_regr_avgx +#define HEADER_regr_avgx + +#include +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. + +// Return the regr_avgx value of the dataset + +class regr_avgx : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_avgx() : mcsv1_UDAF() {}; + virtual ~regr_avgx() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_avgx.h + From 06e9772310005c3f824652ae8fbc5dc87d6f84e6 Mon Sep 17 00:00:00 2001 From: David Hall Date: Mon, 14 May 2018 17:28:24 -0500 Subject: [PATCH 13/19] MCOL-1201 some fixes from testing --- dbcon/joblist/tupleaggregatestep.cpp | 229 ++++++++++++--------------- dbcon/mysql/ha_calpont_execplan.cpp | 1 - utils/common/common.vpj | 2 + utils/rowgroup/rowaggregation.cpp | 4 +- 4 files changed, 106 insertions(+), 130 deletions(-) diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index ff490da5b..8f7755ad9 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -852,7 +852,6 @@ SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo) if (ac->aggOp() == ROWAGG_UDAF) { UDAFColumn* udafc = dynamic_cast(ac); - if (udafc) { constAggDataVec.push_back( @@ -1097,8 +1096,9 @@ void TupleAggregateStep::prep1PhaseAggregate( vector functionVec; uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); + // For UDAF uint32_t projColsUDAFIndex = 0; - + UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // for count column of average function map avgFuncMap; @@ -1287,12 +1287,10 @@ void TupleAggregateStep::prep1PhaseAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -1300,12 +1298,10 @@ void TupleAggregateStep::prep1PhaseAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, i)); break; } - } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -1474,8 +1470,6 @@ void TupleAggregateStep::prep1PhaseAggregate( throw logic_error("(2)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } - pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); - // Return column oidsAgg.push_back(oidsProj[colProj]); keysAgg.push_back(key); @@ -1677,8 +1671,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( uint32_t bigIntWidth = sizeof(int64_t); // map key = column key, operation (enum), and UDAF pointer if UDAF. AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; set avgSet; + + // fOR udaf + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; uint32_t projColsUDAFIndex = 0; // for count column of average function @@ -1847,7 +1844,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; if (udafc) @@ -1857,12 +1854,10 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAgg)); break; } - } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -2142,6 +2137,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // locate the return column position in aggregated rowgroup for (uint64_t i = 0; i < returnedColVec.size(); i++) { + udafc = NULL; pUDAFFunc = NULL; uint32_t retKey = returnedColVec[i].first; RowAggFunctionType aggOp = functionIdMap(returnedColVec[i].second); @@ -2150,10 +2146,21 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != @@ -2473,26 +2480,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - - for (; it != jobInfo.projectionCols.end(); it++) - { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); - break; - } - } - - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(3)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); - } + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); } else { @@ -2904,7 +2892,10 @@ void TupleAggregateStep::prep2PhasesAggregate( vector > aggColVec; set avgSet; vector >& returnedColVec = jobInfo.returnedColVec; + // For UDAF uint32_t projColsUDAFIndex = 0; + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -2947,7 +2938,6 @@ void TupleAggregateStep::prep2PhasesAggregate( uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // associate the columns between projected RG and aggregate RG on PM // populated the aggregate columns @@ -3084,12 +3074,10 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3098,10 +3086,9 @@ void TupleAggregateStep::prep2PhasesAggregate( break; } } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -3350,10 +3337,6 @@ void TupleAggregateStep::prep2PhasesAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { - // Keep a count of the parms after the first for any aggregate. - // These will be skipped and the count needs to be subtracted - // from where the aux column will be. - int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap; @@ -3369,6 +3352,8 @@ void TupleAggregateStep::prep2PhasesAggregate( } // locate the return column position in aggregated rowgroup from PM + // outIdx is i without the multi-columns, + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { uint32_t retKey = returnedColVec[i].first; @@ -3379,19 +3364,30 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_MULTI_PARM) { // Skip on UM: Extra parms for an aggregate have no work on the UM - ++multiParms; continue; } + // Is this a UDAF? use the function as part of the key. - - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; - + pUDAFFunc = NULL; + udafc = NULL; if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); @@ -3492,7 +3488,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep2PhasesAggregate: " << emsg << " oid=" @@ -3514,7 +3510,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByUm[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByUm[j]->fOutputColumnIndex = i; + groupByUm[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByUm[j]->fOutputColumnIndex; } @@ -3525,7 +3521,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (jobInfo.distinctColVec[j] == retKey) { if (groupByUm[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByUm[j]->fOutputColumnIndex = i; + groupByUm[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByUm[j]->fOutputColumnIndex; } @@ -3534,7 +3530,7 @@ void TupleAggregateStep::prep2PhasesAggregate( // a duplicate group by column if (dupGroupbyIndex != -1) functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } else { @@ -3542,30 +3538,11 @@ void TupleAggregateStep::prep2PhasesAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - - for (; it != jobInfo.projectionCols.end(); it++) - { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i-multiParms)); - break; - } - } - - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); - } + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i-multiParms)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -3600,6 +3577,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (returnedColVec[i].second == AggregateColumn::AVG) avgFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // now fix the AVG function, locate the count(column) position @@ -3617,7 +3595,7 @@ void TupleAggregateStep::prep2PhasesAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size() - multiParms; + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -3724,7 +3702,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector > aggColVec, aggNoDistColVec; set avgSet, avgDistSet; vector >& returnedColVec = jobInfo.returnedColVec; + // For UDAF uint32_t projColsUDAFIndex = 0; + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -3796,7 +3777,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( uint32_t bigIntWidth = sizeof(int64_t); map, uint64_t> avgFuncDistMap; AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // associate the columns between projected RG and aggregate RG on PM // populated the aggregate columns @@ -3940,12 +3920,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3954,10 +3932,9 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( break; } } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -4201,32 +4178,33 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // associate the columns between the aggregate RGs on PM and UM without distinct aggregator // populated the returned columns { + int64_t multiParms = 0; + for (uint32_t idx = 0; idx < groupByPm.size(); idx++) { SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(idx, idx)); groupByUm.push_back(groupby); } - // Keep a count of the parms after the first for any aggregate. - // These will be skipped and the count needs to be subtracted - // from where the aux column will be. - int64_t multiParms = 0; for (uint32_t idx = 0; idx < functionVecPm.size(); idx++) - { SP_ROWAGG_FUNC_t funct; SP_ROWAGG_FUNC_t funcPm = functionVecPm[idx]; - // UDAF support if (funcPm->fAggFunction == ROWAGG_MULTI_PARM) { - // Multi-Parm is not used on the UM + // Skip on UM: Extra parms for an aggregate have no work on the UM ++multiParms; continue; } + if (funcPm->fAggFunction == ROWAGG_UDAF) { RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); + if (!udafFuncCol) + { + throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + } funct.reset(new RowUDAFFunctionCol( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, @@ -4273,6 +4251,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // These will be skipped and the count needs to be subtracted // from where the aux column will be. int64_t multiParms = 0; + projColsUDAFIndex = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4286,9 +4265,12 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // locate the return column position in aggregated rowgroup from PM + // outIdx is i without the multi-columns, + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { pUDAFFunc = NULL; + udafc = NULL; uint32_t retKey = returnedColVec[i].first; RowAggFunctionType aggOp = functionIdMap(returnedColVec[i].second); @@ -4304,10 +4286,21 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != @@ -4436,7 +4429,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it != aggFuncMap.end()) { - colUm = it->second - multiParms; + colUm = it->second; oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(keysAggUm[colUm]); scaleAggDist.push_back(scaleAggUm[colUm]); @@ -4460,7 +4453,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // false alarm returnColMissing = false; - colUm = it->second - multiParms; + colUm = it->second; if (aggOp == ROWAGG_SUM) { @@ -4528,7 +4521,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep2PhasesDistinctAggregate: " << emsg << " oid=" @@ -4552,7 +4545,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByNoDist[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByNoDist[j]->fOutputColumnIndex = i; + groupByNoDist[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByNoDist[j]->fOutputColumnIndex; } @@ -4561,7 +4554,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // a duplicate group by column if (dupGroupbyIndex != -1) functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } else { @@ -4569,30 +4562,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - - for (; it != jobInfo.projectionCols.end(); it++) - { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i-multiParms)); - break; - } - } - - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); - } + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i-multiParms)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -4629,6 +4603,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // for (i // now fix the AVG function, locate the count(column) position @@ -4646,7 +4621,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size() - multiParms; + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -4706,7 +4681,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(5)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 9150d5393..c1706eee7 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4573,7 +4573,6 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) udafc->resultType(ct); } } - } catch (std::logic_error e) { diff --git a/utils/common/common.vpj b/utils/common/common.vpj index 69059884c..ea67e04ba 100755 --- a/utils/common/common.vpj +++ b/utils/common/common.vpj @@ -200,6 +200,7 @@ + @@ -208,6 +209,7 @@ Name="Header Files" Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if"> + diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index c1f5bbd63..043dcaac2 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -2015,13 +2015,13 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { - int32_t paramCount = fRGContext.getParameterCount(); + uint32_t paramCount = fRGContext.getParameterCount(); // The vector of parameters to be sent to the UDAF mcsv1sdk::ColumnDatum valsIn[paramCount]; uint32_t dataFlags[paramCount]; execplan::CalpontSystemCatalog::ColDataType colDataType; - for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) + for (uint32_t i = 0; i < paramCount; ++i) { mcsv1sdk::ColumnDatum& datum = valsIn[i]; // Turn on NULL flags From c8c3b23e32b676d09604c39798167102b03df4c3 Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 15 May 2018 13:15:45 -0500 Subject: [PATCH 14/19] MCOL-1201 Modify docs. Fix group concat bug --- dbcon/mysql/ha_calpont_execplan.cpp | 1 + utils/udfsdk/docs/source/changelog.rst | 1 + .../docs/source/reference/ColumnDatum.rst | 6 ++-- .../docs/source/reference/MariaDBUDAF.rst | 2 +- .../udfsdk/docs/source/reference/UDAFMap.rst | 2 +- .../docs/source/reference/mcsv1Context.rst | 2 +- .../docs/source/reference/mcsv1_UDAF.rst | 36 ++++++++----------- utils/udfsdk/docs/source/usage/cmakelists.rst | 2 +- utils/udfsdk/docs/source/usage/compile.rst | 2 +- utils/udfsdk/docs/source/usage/headerfile.rst | 6 ++-- .../udfsdk/docs/source/usage/introduction.rst | 4 +-- utils/udfsdk/docs/source/usage/sourcefile.rst | 29 +++++++-------- utils/udfsdk/udfsdk.vpj | 33 +++++++++++++++++ 13 files changed, 75 insertions(+), 51 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index c1706eee7..4a86dc218 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4165,6 +4165,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) rowCol->columnVec(selCols); (dynamic_cast(ac))->orderCols(orderCols); parm.reset(rowCol); + ac->aggParms().push_back(parm); if (gc->str_separator()) { diff --git a/utils/udfsdk/docs/source/changelog.rst b/utils/udfsdk/docs/source/changelog.rst index fcd93d54c..1a7c749f9 100644 --- a/utils/udfsdk/docs/source/changelog.rst +++ b/utils/udfsdk/docs/source/changelog.rst @@ -5,4 +5,5 @@ Version History | Version | Date | Changes | +=========+============+=============================+ | 1.1.0α | 2017-08-25 | - First alpha release | +| 1.2.0α | 2016-05-18 | - Add multi parm support | +---------+------------+-----------------------------+ diff --git a/utils/udfsdk/docs/source/reference/ColumnDatum.rst b/utils/udfsdk/docs/source/reference/ColumnDatum.rst index dd1006363..5304a2953 100644 --- a/utils/udfsdk/docs/source/reference/ColumnDatum.rst +++ b/utils/udfsdk/docs/source/reference/ColumnDatum.rst @@ -1,3 +1,5 @@ +.. _ColumnDatum: + ColumnDatum =========== @@ -13,7 +15,7 @@ Example for int data: int myint = valIn.cast(); -For multi-paramter aggregations (not available in Columnstore 1.1), the colsIn vector of next_value() contains the ordered set of row parameters. +For multi-paramter aggregations (not available in Columnstore 1.1), the colsIn array of next_value() contains the ordered set of row parameters. For char, varchar, text, varbinary and blob types, columnData will be std::string. @@ -59,7 +61,7 @@ The provided values are: * - SMALLINT - A signed two byte integer * - DECIMAL - - A Columnstore Decimal value. For Columnstore 1.1, this is stored in the smallest integer type field that will hold the required precision. + - A Columnstore Decimal value. This is stored in the smallest integer type field that will hold the required precision. * - MEDINT - A signed four byte integer * - INT diff --git a/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst b/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst index 1f6fa7acb..d031705d8 100644 --- a/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst +++ b/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst @@ -13,7 +13,7 @@ The library placed in mysql/lib is the name you use in the SQL CREATE AGGREGATE CREATE AGGREGATE FUNCTION ssq returns REAL soname 'libudf_mysql.so'; -Unlike the code you write for the Columnstore UDAF, MariaDB does not handle allocation and de-allocation of your memory structures. If writing your function for other engines, you must handle allocation and de-alloaction in :ref:`function_init ` and :ref:`function_deinit ` +Unlike the code you write for the Columnstore UDAF, MariaDB does not handle allocation and de-allocation of your memory structures in other engines. If writing your function for other engines, you must handle allocation and de-alloaction in :ref:`function_init ` and :ref:`function_deinit ` All of the MariaDB UDF and UDAF example functions are in a single source file named udfmysql.cpp and linked into libudf_mysql.so. diff --git a/utils/udfsdk/docs/source/reference/UDAFMap.rst b/utils/udfsdk/docs/source/reference/UDAFMap.rst index 48706bab3..d3cda63f4 100644 --- a/utils/udfsdk/docs/source/reference/UDAFMap.rst +++ b/utils/udfsdk/docs/source/reference/UDAFMap.rst @@ -3,7 +3,7 @@ UDAFMap ======= -The UDAFMap is where we tell the system about our function. For Columnstore 1.1, you must manually place your function into this map. +The UDAFMap is where we tell the system about our function. For Columnstore 1.2, you must manually place your function into this map. * open mcsv1_udaf.cpp * add your header to the #include list diff --git a/utils/udfsdk/docs/source/reference/mcsv1Context.rst b/utils/udfsdk/docs/source/reference/mcsv1Context.rst index 279220fb3..02adf57ab 100644 --- a/utils/udfsdk/docs/source/reference/mcsv1Context.rst +++ b/utils/udfsdk/docs/source/reference/mcsv1Context.rst @@ -150,7 +150,7 @@ Use these to determine the way your UDA(n)F was called .. c:function:: size_t getParameterCount() const; -:returns: the number of parameters to the function in the SQL query. Columnstore 1.1 only supports one parameter. +:returns: the number of parameters to the function in the SQL query. .. c:function:: bool isParamNull(int paramIdx); diff --git a/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst b/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst index 73c8f6570..f75fe73fc 100644 --- a/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst +++ b/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst @@ -1,4 +1,4 @@ -.. _ mcsv1_udaf: +.. _mcsv1_udaf: mcsv1_UDAF ========== @@ -11,12 +11,14 @@ The base class has no data members. It is designed to be only a container for yo However, adding static const members makes sense. -For UDAF (not Wndow Functions) Aggregation takes place in three stages: +For UDAF (not Window Functions) Aggregation takes place in three stages: * Subaggregation on the PM. nextValue() * Consolodation on the UM. subevaluate() * Evaluation of the function on the UM. evaluate() +There are situations where the system makes a choice to perform all UDAF calculations on the UM. The presence of group_concat() in the query and certain joins can cause the optimizer to make this choice. + For Window Functions, all aggregation occurs on the UM, and thus the subevaluate step is skipped. There is an optional dropValue() function that may be added. * Aggregation on the UM. nextValue() @@ -80,17 +82,11 @@ Callback Methods .. _init: -.. c:function:: ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); +.. c:function:: ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); :param context: The context object for this call. -:param colTypes: A list of the column types of the parameters. - - COL_TYPES is defined as:: - - typedef std::vector >COL_TYPES; - - In Columnstore 1.1, only one column is supported, so colTyoes will be of length one. +:param colTypes: A list of ColumnDatum structures. Use this to access the column types of the parameters. colTypes.columnData will be invalid. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS @@ -116,25 +112,23 @@ Callback Methods .. _nextvalue: -.. c:function:: ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); +.. c:function:: ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); :param context: The context object for this call -:param valsIn: a vector representing the values to be added for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. - +:param valsIn: an array representing the values to be added for each parameter for this row. + :returns: ReturnCode::ERROR or ReturnCode::SUCCESS Use context->getUserData() and type cast it to your UserData type or Simple Data Model stuct. nextValue() is called for each Window movement that passes the WHERE and HAVING clauses. The context's UserData will contain values that have been sub-aggregated to this point for the group, partition or Window Frame. nextValue is called on the PM for aggregation and on the UM for Window Functions. - When used in an aggregate, the function may not rely on order or completeness since the sub-aggregation is going on at the PM, it only has access to the data stored on the PM's dbroots. + When used in an aggregate, the function should not rely on order or completeness since the sub-aggregation is going on at the PM, it only has access to the data stored on the PM's dbroots. - When used as a analytic function (Window Function), nextValue is call for each Window movement in the Window. If dropValue is defined, then it may be called for every value leaving the Window, and nextValue called for each new value entering the Window. + When used as a analytic function (Window Function), nextValue is called for each Window movement in the Window. If dropValue is defined, then it may be called for every value leaving the Window, and nextValue called for each new value entering the Window. - Since this is called for every row, it is important that this method be efficient. + Since this may called for every row, it is important that this method be efficient. .. _subevaluate: @@ -172,13 +166,11 @@ Callback Methods .. _dropvalue: -.. c:function:: ReturnCode dropValue(mcsv1Context* context, std::vector& valsDropped); +.. c:function:: ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); :param context: The context object for this call -:param valsDropped: a vector representing the values to be dropped for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. +:param valsDropped: an array representing the values to be dropped for each parameter for this row. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS diff --git a/utils/udfsdk/docs/source/usage/cmakelists.rst b/utils/udfsdk/docs/source/usage/cmakelists.rst index 32a218459..a7ddacbaf 100644 --- a/utils/udfsdk/docs/source/usage/cmakelists.rst +++ b/utils/udfsdk/docs/source/usage/cmakelists.rst @@ -3,7 +3,7 @@ CMakeLists.txt ============== -For Columnstore 1.1, you compile your function by including it in the CMakeLists.txt file for the udfsdk. +For Columnstore 1.2, you compile your function by including it in the CMakeLists.txt file for the udfsdk. You need only add the new .cpp files to the udfsdk_LIB_SRCS target list:: diff --git a/utils/udfsdk/docs/source/usage/compile.rst b/utils/udfsdk/docs/source/usage/compile.rst index e6319e45b..b96af5d80 100644 --- a/utils/udfsdk/docs/source/usage/compile.rst +++ b/utils/udfsdk/docs/source/usage/compile.rst @@ -3,7 +3,7 @@ Compile ======= -To compile your function for Columnstore 1.1, simple recompile the udfsdk directory:: +To compile your function for Columnstore 1.2, simply recompile the udfsdk directory:: cd utils/usdsdk cmake . diff --git a/utils/udfsdk/docs/source/usage/headerfile.rst b/utils/udfsdk/docs/source/usage/headerfile.rst index 720acc5be..afb043e98 100644 --- a/utils/udfsdk/docs/source/usage/headerfile.rst +++ b/utils/udfsdk/docs/source/usage/headerfile.rst @@ -5,7 +5,7 @@ Header file Usually, each UDA(n)F function will have one .h and one .cpp file plus code for the mariadb UDAF plugin which may or may not be in a separate file. It is acceptable to put a set of related functions in the same files or use separate files for each. -The easiest way to create these files is to copy them an example closest to the type of function you intend to create. +The easiest way to create these files is to copy them from an example closest to the type of function you intend to create. Your header file must have a class defined that will implement your function. This class must be derived from mcsv1_UDAF and be in the mcsv1sdk namespace. The following examples use the "allnull" UDAF. @@ -29,9 +29,9 @@ allnull uses the Simple Data Model. See :ref:`complexdatamodel` to see how that allnull() : mcsv1_UDAF(){}; virtual ~allnull(){}; - virtual ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); virtual ReturnCode reset(mcsv1Context* context); - virtual ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn); virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); }; diff --git a/utils/udfsdk/docs/source/usage/introduction.rst b/utils/udfsdk/docs/source/usage/introduction.rst index 6b3544a1e..19c612caa 100644 --- a/utils/udfsdk/docs/source/usage/introduction.rst +++ b/utils/udfsdk/docs/source/usage/introduction.rst @@ -3,7 +3,7 @@ mcsv1_udaf Introduction mcsv1_udaf is a C++ API for writing User Defined Aggregate Functions (UDAF) and User Defined Analytic Functions (UDAnF) for the MariaDB Columstore engine. -In Columnstore 1.1.0, functions written using this API must be compiled into the udfsdk and udf_mysql libraries of the Columnstore code branch. +In Columnstore 1.2, functions written using this API must be compiled into the udfsdk and udf_mysql libraries of the Columnstore code branch. The API has a number of features. The general theme is, there is a class that represents the function, there is a context under which the function operates, and there is a data store for intermediate values. @@ -18,5 +18,5 @@ The steps required to create a function are: * :ref:`Compile udfsdk `. * :ref:`Copy the compiled libraries ` to the working directories. -In 1.1.0, Columnstore does not have a plugin framework, so the functions have to be compiled into the libraries that Columnstore already loads. +In 1.2, Columnstore does not have a plugin framework, so the functions have to be compiled into the libraries that Columnstore already loads. diff --git a/utils/udfsdk/docs/source/usage/sourcefile.rst b/utils/udfsdk/docs/source/usage/sourcefile.rst index b7ed38a32..5c43f29e4 100644 --- a/utils/udfsdk/docs/source/usage/sourcefile.rst +++ b/utils/udfsdk/docs/source/usage/sourcefile.rst @@ -34,21 +34,17 @@ Or, if using the :ref:`complexdatamodel`, type cast the UserData to your UserDat init() ------ -.. c:function:: ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); +.. c:function:: ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); :param context: The context object for this call. -:param colTypes: A list of the column types of the parameters. +:param colTypes: A list of the ColumnDatum used to access column types of the parameters. In init(), the columnData member is invalid. - COL_TYPES is defined as:: - - typedef std::vector >COL_TYPES; - - see :ref:`ColDataTypes `. In Columnstore 1.1, only one column is supported, so colTyoes will be of length one. + see :ref:`ColumnDatum`. In Columnstore 1.2, An arbitrary number of parameters is supported. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS -The init() method is where you sanity check the input, set the output type and set any run flags for this instance. init() is called one time from the mysqld process. All settings you do here are propagated through the system. +The init() method is where you sanity check the input datatypes, set the output type and set any run flags for this instance. init() is called one time from the mysqld process. All settings you do here are propagated through the system. init() is the exception to type casting the UserData member of context. UserData has not been created when init() is called, so you shouldn't use it here. @@ -60,13 +56,14 @@ If you're using :ref:`simpledatamodel`, you need to set the size of the structur .. rubric:: Check parameter count and type -Each function expects a certain number of columns to entered as parameters in the SQL query. For columnstore 1.1, the number of parameters is limited to one. +Each function expects a certain number of columns to be entered as parameters in the SQL query. It is possible to create a UDAF that accepts a variable number of parameters. You can discover which ones were actually used in init(), and modify your function's behavior accordingly. -colTypes is a vector of each parameter name and type. The name is the colum name from the SQL query. You can use this information to sanity check for compatible type(s) and also to modify your functions behavior based on type. To do this, add members to your data struct to be tested in the other Methods. Set these members based on colDataTypes (:ref:`ColDataTypes `). +colTypes is an array of ColumnData from which can be gleaned the type and name. The name is the column name from the SQL query. You can use this information to sanity check for compatible type(s) and also to modify your functions behavior based on type. To do this, add members to your data struct to be tested in the other Methods. Set these members based on colDataTypes (:ref:`ColDataTypes `). +The actual number of paramters passed can be gotten from context->getParameterCount(). :: - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -84,7 +81,7 @@ When you create your function using the SQL CREATE FUNCTION command, you must in .. rubric:: Set width and scale -If you have secial requirements, especially if you might be dealing with decimal types:: +If you have special requirements, especially if you might be dealing with decimal types:: context->setColWidth(8); context->setScale(context->getScale()*2); @@ -117,13 +114,11 @@ This function may be called multiple times from both the UM and the PM. Make no nextValue() ----------- -.. c:function:: ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); +.. c:function:: ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); :param context: The context object for this call -:param valsIn: a vector representing the values to be added for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. +:param valsIn: an array representing the values to be added for each parameter for this row. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS @@ -208,7 +203,7 @@ For AVG, you might see:: dropValue --------- -.. c:function:: ReturnCode dropValue(mcsv1Context* context, std::vector& valsDropped); +.. c:function:: ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); :param context: The context object for this call diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index fe1f3fd0e..3d3ac39ca 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -238,5 +238,38 @@ N="Makefile" Type="Makefile"/> + + + + + + + + + + + + + + + + + + + + + + + + + + + From ec3a3846c3d66d79170a0b49244c435bc02b21f2 Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 11 May 2018 09:50:10 -0500 Subject: [PATCH 15/19] MCOL-1201 manual rebase with develop. Obsoletes branch MCOL-1201 --- dbcon/joblist/tupleaggregatestep.cpp | 234 +++++--- dbcon/mysql/ha_calpont_execplan.cpp | 778 ++++++++++++++------------- utils/rowgroup/rowaggregation.cpp | 204 ++++--- 3 files changed, 707 insertions(+), 509 deletions(-) diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 8f7755ad9..be0e2009d 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -1097,7 +1097,8 @@ void TupleAggregateStep::prep1PhaseAggregate( uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); // For UDAF - uint32_t projColsUDAFIndex = 0; + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // for count column of average function @@ -1139,6 +1140,7 @@ void TupleAggregateStep::prep1PhaseAggregate( // populate the aggregate rowgroup AGG_MAP aggFuncMap; + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -1156,8 +1158,9 @@ void TupleAggregateStep::prep1PhaseAggregate( typeAgg.push_back(ti.dtype); widthAgg.push_back(ti.width); SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( - aggOp, stats, 0, i, jobInfo.cntStarPos)); + aggOp, stats, 0, outIdx, jobInfo.cntStarPos)); functionVec.push_back(funct); + ++outIdx; continue; } @@ -1173,9 +1176,10 @@ void TupleAggregateStep::prep1PhaseAggregate( typeAgg.push_back(ti.dtype); widthAgg.push_back(width); SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( - aggOp, stats, 0, i, -1)); + aggOp, stats, 0, outIdx, -1)); functionVec.push_back(funct); + ++outIdx; continue; } @@ -1221,16 +1225,17 @@ void TupleAggregateStep::prep1PhaseAggregate( widthAgg.push_back(width[colProj]); if (groupBy[it->second]->fOutputColumnIndex == (uint32_t) - 1) - groupBy[it->second]->fOutputColumnIndex = i; + groupBy[it->second]->fOutputColumnIndex = outIdx; else functionVec.push_back(SP_ROWAGG_FUNC_t( new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, - i, + outIdx, groupBy[it->second]->fOutputColumnIndex))); + ++outIdx; continue; } else if (find(jobInfo.expressionVec.begin(), jobInfo.expressionVec.end(), key) != @@ -1243,6 +1248,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(ti.precision); typeAgg.push_back(ti.dtype); widthAgg.push_back(ti.width); + ++outIdx; continue; } else if (jobInfo.groupConcatInfo.columns().find(key) != @@ -1255,6 +1261,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); + ++outIdx; continue; } else if (jobInfo.windowSet.find(key) != jobInfo.windowSet.end()) @@ -1266,6 +1273,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); + ++outIdx; continue; } else @@ -1286,16 +1294,16 @@ void TupleAggregateStep::prep1PhaseAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, i)); + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, outIdx)); break; } } @@ -1306,7 +1314,7 @@ void TupleAggregateStep::prep1PhaseAggregate( } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colProj, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colProj, outIdx)); } functionVec.push_back(funct); @@ -1477,6 +1485,14 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(udafFuncCol->fUDAFContext.getPrecision()); typeAgg.push_back(udafFuncCol->fUDAFContext.getResultType()); widthAgg.push_back(udafFuncCol->fUDAFContext.getColWidth()); + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } @@ -1488,6 +1504,13 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); + // If the param is const + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; } break; @@ -1520,6 +1543,11 @@ void TupleAggregateStep::prep1PhaseAggregate( { aggFuncMap.insert(make_pair(boost::make_tuple(key, aggOp, pUDAFFunc), funct->fOutputColumnIndex)); } + + if (aggOp != ROWAGG_MULTI_PARM) + { + ++outIdx; + } } // now fix the AVG function, locate the count(column) position @@ -1671,12 +1699,14 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( uint32_t bigIntWidth = sizeof(int64_t); // map key = column key, operation (enum), and UDAF pointer if UDAF. AGG_MAP aggFuncMap; - set avgSet; +// set avgSet; + list multiParmIndexes; // fOR udaf UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; - uint32_t projColsUDAFIndex = 0; + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; // for count column of average function map avgFuncMap, avgDistFuncMap; @@ -1825,9 +1855,9 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } // skip sum / count(column) if avg is also selected - if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) && - (avgSet.find(aggKey) != avgSet.end())) - continue; +// if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) && +// (avgSet.find(aggKey) != avgSet.end())) +// continue; if (aggOp == ROWAGG_DISTINCT_SUM || aggOp == ROWAGG_DISTINCT_AVG || @@ -1840,12 +1870,12 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { @@ -2063,7 +2093,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(udafFuncCol->fUDAFContext.getResultType()); widthAgg.push_back(udafFuncCol->fUDAFContext.getColWidth()); ++colAgg; - // UDAF Dummy holder for UserData struct + // Column for index of UDAF UserData struct oidsAgg.push_back(oidsProj[colProj]); keysAgg.push_back(aggKey); scaleAgg.push_back(0); @@ -2071,6 +2101,14 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(CalpontSystemCatalog::UBIGINT); widthAgg.push_back(sizeof(uint64_t)); funct->fAuxColumnIndex = colAgg++; + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } @@ -2082,7 +2120,15 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(widthProj[colProj]); + multiParmIndexes.push_back(colAgg); ++colAgg; + // If the param is const + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; } break; @@ -2122,7 +2168,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. AGG_MAP aggDupFuncMap; - pUDAFFunc = NULL; + projColsUDAFIdx = 0; + int64_t multiParms = 0; // copy over the groupby vector // update the outputColumnIndex if returned @@ -2133,8 +2180,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i)); } - projColsUDAFIndex = 0; // locate the return column position in aggregated rowgroup + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { udafc = NULL; @@ -2144,23 +2191,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colAgg = -1; - if (aggOp == ROWAGG_UDAF) + if (aggOp == ROWAGG_MULTI_PARM) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) - { - udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - break; - } - } - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); - } + // Skip on final agg.: Extra parms for an aggregate have no work there. + ++multiParms; + continue; } if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != @@ -2188,6 +2223,25 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } } + if (aggOp == ROWAGG_UDAF) + { + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIdx++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } + } + switch (aggOp) { case ROWAGG_DISTINCT_AVG: @@ -2438,7 +2492,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep1PhaseDistinctAggregate: " << emsg << " oid=" @@ -2462,7 +2516,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByNoDist[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByNoDist[j]->fOutputColumnIndex = i; + groupByNoDist[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByNoDist[j]->fOutputColumnIndex; } @@ -2472,7 +2526,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (dupGroupbyIndex != -1) functionVec2.push_back(SP_ROWAGG_FUNC_t( new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } else { @@ -2480,11 +2534,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colAgg, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -2521,6 +2575,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // for (i // now fix the AVG function, locate the count(column) position @@ -2538,7 +2593,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -2753,6 +2808,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); groupBySub.push_back(groupby); + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; + // tricky part : 2 function vectors // -- dummy function vector for sub-aggregator, which does distinct only // -- aggregate function on this distinct column for rowAggDist @@ -2760,6 +2820,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } if (returnedColVec[k].first != distinctColKey) continue; @@ -2780,7 +2845,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -2799,9 +2864,15 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( { vector functionSub1 = functionNoDistVec; vector functionSub2; + int64_t multiParms = 0; for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } // search non-distinct functions in functionVec vector::iterator it = functionVec2.begin(); @@ -2817,7 +2888,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } else if ((f->fOutputColumnIndex == k) && @@ -2839,7 +2910,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -2893,7 +2964,8 @@ void TupleAggregateStep::prep2PhasesAggregate( set avgSet; vector >& returnedColVec = jobInfo.returnedColVec; // For UDAF - uint32_t projColsUDAFIndex = 0; + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; @@ -3073,11 +3145,11 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3305,6 +3377,14 @@ void TupleAggregateStep::prep2PhasesAggregate( typeAggPm.push_back(CalpontSystemCatalog::UBIGINT); widthAggPm.push_back(bigUintWidth); funct->fAuxColumnIndex = colAggPm++; + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } @@ -3317,6 +3397,13 @@ void TupleAggregateStep::prep2PhasesAggregate( typeAggPm.push_back(typeProj[colProj]); widthAggPm.push_back(width[colProj]); colAggPm++; + // If the param is const + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; } break; @@ -3342,7 +3429,7 @@ void TupleAggregateStep::prep2PhasesAggregate( map avgFuncMap; AGG_MAP aggDupFuncMap; - projColsUDAFIndex = 0; + projColsUDAFIdx = 0; // copy over the groupby vector // update the outputColumnIndex if returned for (uint64_t i = 0; i < groupByPm.size(); i++) @@ -3372,12 +3459,12 @@ void TupleAggregateStep::prep2PhasesAggregate( udafc = NULL; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3703,7 +3790,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( set avgSet, avgDistSet; vector >& returnedColVec = jobInfo.returnedColVec; // For UDAF - uint32_t projColsUDAFIndex = 0; + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; @@ -3919,11 +4007,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -4147,6 +4235,14 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( typeAggPm.push_back(CalpontSystemCatalog::UBIGINT); widthAggPm.push_back(sizeof(uint64_t)); funct->fAuxColumnIndex = colAggPm++; + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } @@ -4160,6 +4256,13 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( widthAggPm.push_back(width[colProj]); multiParmIndexes.push_back(colAggPm); colAggPm++; + // If the param is const + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; } break; @@ -4208,9 +4311,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funct.reset(new RowUDAFFunctionCol( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fOutputColumnIndex, + udafFuncCol->fOutputColumnIndex-multiParms, udafFuncCol->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); + pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); } else { @@ -4218,9 +4322,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funcPm->fAggFunction, funcPm->fStatsFunction, funcPm->fOutputColumnIndex, - funcPm->fOutputColumnIndex, + funcPm->fOutputColumnIndex-multiParms, funcPm->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); + pUDAFFunc = NULL; } } @@ -4251,7 +4356,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // These will be skipped and the count needs to be subtracted // from where the aux column will be. int64_t multiParms = 0; - projColsUDAFIndex = 0; + projColsUDAFIdx = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4286,11 +4391,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -4436,6 +4541,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( precisionAggDist.push_back(precisionAggUm[colUm]); typeAggDist.push_back(typeAggUm[colUm]); widthAggDist.push_back(widthAggUm[colUm]); + colUm -= multiParms; } // not a direct hit -- a returned column is not already in the RG from PMs @@ -4472,8 +4578,16 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(retKey); scaleAggDist.push_back(0); - precisionAggDist.push_back(19); - typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + if (isUnsigned(typeAggUm[colUm])) + { + precisionAggDist.push_back(20); + typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); + } + else + { + precisionAggDist.push_back(19); + typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + } widthAggDist.push_back(bigIntWidth); } } diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 4a86dc218..d030d1855 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4097,426 +4097,429 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) try { - // special parsing for group_concat - if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) - { - Item_func_group_concat* gc = (Item_func_group_concat*)isp; + // special parsing for group_concat + if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + Item_func_group_concat* gc = (Item_func_group_concat*)isp; vector orderCols; - RowColumn* rowCol = new RowColumn(); + RowColumn* rowCol = new RowColumn(); vector selCols; - uint32_t select_ctn = gc->count_field(); - ReturnedColumn* rc = NULL; + uint32_t select_ctn = gc->count_field(); + ReturnedColumn* rc = NULL; - for (uint32_t i = 0; i < select_ctn; i++) + for (uint32_t i = 0; i < select_ctn; i++) + { + rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); + + if (!rc || gwi.fatalParseError) + { + if (ac) + delete ac; + return NULL; + } + + selCols.push_back(SRCP(rc)); + } + + ORDER** order_item, **end; + + for (order_item = gc->get_order(), + end = order_item + gc->order_field(); order_item < end; + order_item++) + { + Item* ord_col = *(*order_item)->item; + + if (ord_col->type() == Item::INT_ITEM) { - rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); + Item_int* id = (Item_int*)ord_col; + + if (id->val_int() > (int)selCols.size()) + { + gwi.fatalParseError = true; + if (ac) + delete ac; + return NULL; + } + + rc = selCols[id->val_int() - 1]->clone(); + rc->orderPos(id->val_int() - 1); + } + else + { + rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); if (!rc || gwi.fatalParseError) { - if (ac) - delete ac; + if (ac) + delete ac; return NULL; } - - selCols.push_back(SRCP(rc)); } - ORDER** order_item, **end; + // 10.2 TODO: direction is now a tri-state flag + rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); + orderCols.push_back(SRCP(rc)); + } - for (order_item = gc->get_order(), - end = order_item + gc->order_field(); order_item < end; - order_item++) - { - Item* ord_col = *(*order_item)->item; - - if (ord_col->type() == Item::INT_ITEM) - { - Item_int* id = (Item_int*)ord_col; - - if (id->val_int() > (int)selCols.size()) - { - gwi.fatalParseError = true; - if (ac) - delete ac; - return NULL; - } - - rc = selCols[id->val_int() - 1]->clone(); - rc->orderPos(id->val_int() - 1); - } - else - { - rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); - - if (!rc || gwi.fatalParseError) - { - if (ac) - delete ac; - return NULL; - } - } - - // 10.2 TODO: direction is now a tri-state flag - rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); - orderCols.push_back(SRCP(rc)); - } - - rowCol->columnVec(selCols); - (dynamic_cast(ac))->orderCols(orderCols); - parm.reset(rowCol); + rowCol->columnVec(selCols); + (dynamic_cast(ac))->orderCols(orderCols); + parm.reset(rowCol); ac->aggParms().push_back(parm); - if (gc->str_separator()) - { - string separator; - separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); - (dynamic_cast(ac))->separator(separator); - } - } - else + if (gc->str_separator()) { - for (uint32_t i = 0; i < isp->argument_count(); i++) + string separator; + separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); + (dynamic_cast(ac))->separator(separator); + } + } + else + { + for (uint32_t i = 0; i < isp->argument_count(); i++) + { + Item* sfitemp = sfitempp[i]; + Item::Type sfitype = sfitemp->type(); + + switch (sfitype) { - Item* sfitemp = sfitempp[i]; - Item::Type sfitype = sfitemp->type(); - - switch (sfitype) + case Item::FIELD_ITEM: { - case Item::FIELD_ITEM: - { - Item_field* ifp = reinterpret_cast(sfitemp); - SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + Item_field* ifp = reinterpret_cast(sfitemp); + SimpleColumn* sc = buildSimpleColumn(ifp, gwi); - if (!sc) - { - gwi.fatalParseError = true; - break; - } - - parm.reset(sc); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); - TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); - gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); - break; - } - - case Item::INT_ITEM: - case Item::STRING_ITEM: - case Item::REAL_ITEM: - case Item::DECIMAL_ITEM: - { - // treat as count(*) - if (ac->aggOp() == AggregateColumn::COUNT) - ac->aggOp(AggregateColumn::COUNT_ASTERISK); - - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::NULL_ITEM: - { - parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::FUNC_ITEM: - { - Item_func* ifp = (Item_func*)sfitemp; - ReturnedColumn* rc = 0; - - // check count(1+1) case - vector tmpVec; - uint16_t parseInfo = 0; - parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); - - if (parseInfo & SUB_BIT) - { - gwi.fatalParseError = true; - break; - } - else if (!gwi.fatalParseError && - !(parseInfo & AGG_BIT) && - !(parseInfo & AF_BIT) && - tmpVec.size() == 0) - { - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - FunctionColumn* fc = dynamic_cast(rc); - - if ((fc && fc->functionParms().empty()) || !fc) - { - //ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (dynamic_cast(rc)) - { - //@bug5229. handle constant function on aggregate argument - ac->constCol(SRCP(rc)); - break; - } - } - } - - // MySQL carelessly allows correlated aggregate function on the WHERE clause. - // Here is the work around to deal with that inconsistence. - // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; - ClauseType clauseType = gwi.clauseType; - - if (gwi.clauseType == WHERE) - gwi.clauseType = HAVING; - - // @bug 3603. for cases like max(rand()). try to build function first. - if (!rc) - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - - parm.reset(rc); - gwi.clauseType = clauseType; - - if (gwi.fatalParseError) - break; - - break; - } - - case Item::REF_ITEM: - { - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (rc) - { - parm.reset(rc); - break; - } - } - - default: + if (!sc) { gwi.fatalParseError = true; - //gwi.parseErrorText = "Non-supported Item in Aggregate function"; + break; + } + + parm.reset(sc); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); + TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); + gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); + break; + } + + case Item::INT_ITEM: + case Item::STRING_ITEM: + case Item::REAL_ITEM: + case Item::DECIMAL_ITEM: + { + // treat as count(*) + if (ac->aggOp() == AggregateColumn::COUNT) + ac->aggOp(AggregateColumn::COUNT_ASTERISK); + parm.reset(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError)); + ac->constCol(parm); + break; + } + + case Item::NULL_ITEM: + { + parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); + ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + break; + } + + case Item::FUNC_ITEM: + { + Item_func* ifp = (Item_func*)sfitemp; + ReturnedColumn* rc = 0; + + // check count(1+1) case + vector tmpVec; + uint16_t parseInfo = 0; + parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); + + if (parseInfo & SUB_BIT) + { + gwi.fatalParseError = true; + break; + } + else if (!gwi.fatalParseError && + !(parseInfo & AGG_BIT) && + !(parseInfo & AF_BIT) && + tmpVec.size() == 0) + { + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + FunctionColumn* fc = dynamic_cast(rc); + + if ((fc && fc->functionParms().empty()) || !fc) + { + //ac->aggOp(AggregateColumn::COUNT_ASTERISK); + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (dynamic_cast(rc)) + { + //@bug5229. handle constant function on aggregate argument + ac->constCol(SRCP(rc)); + break; + } + } + } + + // MySQL carelessly allows correlated aggregate function on the WHERE clause. + // Here is the work around to deal with that inconsistence. + // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; + ClauseType clauseType = gwi.clauseType; + + if (gwi.clauseType == WHERE) + gwi.clauseType = HAVING; + + // @bug 3603. for cases like max(rand()). try to build function first. + if (!rc) + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + + parm.reset(rc); + gwi.clauseType = clauseType; + + if (gwi.fatalParseError) + break; + + break; + } + + case Item::REF_ITEM: + { + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (rc) + { + parm.reset(rc); + break; } } - if (gwi.fatalParseError) + default: { - if (gwi.parseErrorText.empty()) - { - Message::Args args; + gwi.fatalParseError = true; + //gwi.parseErrorText = "Non-supported Item in Aggregate function"; + } + } - if (item->name) - args.add(item->name); - else - args.add(""); + if (gwi.fatalParseError) + { + if (gwi.parseErrorText.empty()) + { + Message::Args args; - gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); - } + if (item->name) + args.add(item->name); + else + args.add(""); + + gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); + } if (ac) delete ac; - return NULL; - } + return NULL; + } if (parm) { // MCOL-1201 multi-argument aggregate ac->aggParms().push_back(parm); } - } } + } // Get result type // Modified for MCOL-1201 multi-argument aggregate if (ac->aggParms().size() > 0) - { + { // These are all one parm functions, so we can safely // use the first parm for result type. parm = ac->aggParms()[0]; - if (isp->sum_func() == Item_sum::AVG_FUNC || - isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) + if (isp->sum_func() == Item_sum::AVG_FUNC || + isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); + + switch (ct.colDataType) { - CalpontSystemCatalog::ColType ct = parm->resultType(); + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::DECIMAL; + ct.colWidth = 8; + ct.scale += 4; + break; - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::DECIMAL; - ct.colWidth = 8; - ct.scale += 4; - break; +#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; +#endif - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; - #endif - - default: - break; - } - - ac->resultType(ct); + default: + break; } - else if (isp->sum_func() == Item_sum::COUNT_FUNC || - isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::COUNT_FUNC || + isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = parm->resultType().scale; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_FUNC || + isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); + + switch (ct.colDataType) { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = parm->resultType().scale; - ac->resultType(ct); + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + ct.colDataType = CalpontSystemCatalog::BIGINT; + + // no break, let fall through + + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + ct.colWidth = 8; + break; + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::UBIGINT; + ct.colWidth = 8; + break; + +#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; +#endif + + default: + break; } - else if (isp->sum_func() == Item_sum::SUM_FUNC || - isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - ct.colDataType = CalpontSystemCatalog::BIGINT; - - // no break, let fall through - - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - ct.colWidth = 8; - break; - - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::UBIGINT; - ct.colWidth = 8; - break; - - #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; - #endif - - default: - break; - } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::STD_FUNC || - isp->sum_func() == Item_sum::VARIANCE_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - ct.scale = 0; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = 0; - ct.precision = -16; // borrowed to indicate skip null value check on connector - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) - { - //Item_func_group_concat* gc = (Item_func_group_concat*)isp; - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::VARCHAR; - ct.colWidth = isp->max_length; - ct.precision = 0; - ac->resultType(ct); - } - else - { - // UDAF result type will be set below. - ac->resultType(parm->resultType()); - } + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::STD_FUNC || + isp->sum_func() == Item_sum::VARIANCE_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + ct.scale = 0; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = 0; + ct.precision = -16; // borrowed to indicate skip null value check on connector + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + //Item_func_group_concat* gc = (Item_func_group_concat*)isp; + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::VARCHAR; + ct.colWidth = isp->max_length; + ct.precision = 0; + ac->resultType(ct); } else { - ac->resultType(colType_MysqlToIDB(isp)); + // UDAF result type will be set below. + ac->resultType(parm->resultType()); } + } + else + { + ac->resultType(colType_MysqlToIDB(isp)); + } - // adjust decimal result type according to internalDecimalScale - if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) + // adjust decimal result type according to internalDecimalScale + if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) + { + CalpontSystemCatalog::ColType ct = ac->resultType(); + ct.scale = gwi.internalDecimalScale; + ac->resultType(ct); + } + + // check for same aggregate on the select list + ac->expressionId(ci->expressionId++); + + if (gwi.clauseType != SELECT) + { + for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) { - CalpontSystemCatalog::ColType ct = ac->resultType(); - ct.scale = gwi.internalDecimalScale; - ac->resultType(ct); + if (*ac == gwi.returnedCols[i].get()) + ac->expressionId(gwi.returnedCols[i]->expressionId()); } + } - // check for same aggregate on the select list - ac->expressionId(ci->expressionId++); - - if (gwi.clauseType != SELECT) + // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will + // be applied in ExeMgr. When the ExeMgr fix is available, this checking + // will be taken out. + if (isp->sum_func() != Item_sum::UDF_SUM_FUNC) { - for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) - { - if (*ac == gwi.returnedCols[i].get()) - ac->expressionId(gwi.returnedCols[i]->expressionId()); - } + if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "No project column found for aggregate function"; + if (ac) + delete ac; + return NULL; + } + else if (ac->constCol()) + { + gwi.count_asterisk_list.push_back(ac); + } } - // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will - // be applied in ExeMgr. When the ExeMgr fix is available, this checking - // will be taken out. - if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) - { - gwi.fatalParseError = true; - gwi.parseErrorText = "No project column found for aggregate function"; - if (ac) - delete ac; - return NULL; - } - else if (ac->constCol()) - { - gwi.count_asterisk_list.push_back(ac); - } - - // For UDAF, populate the context and call the UDAF init() function. + // For UDAF, populate the context and call the UDAF init() function. // The return type is (should be) set in context by init(). - if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) + if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) + { + UDAFColumn* udafc = dynamic_cast(ac); + + if (udafc) { - UDAFColumn* udafc = dynamic_cast(ac); + mcsv1Context& context = udafc->getContext(); + context.setName(isp->func_name()); - if (udafc) - { - mcsv1Context& context = udafc->getContext(); - context.setName(isp->func_name()); - - // Set up the return type defaults for the call to init() - context.setResultType(udafc->resultType().colDataType); - context.setColWidth(udafc->resultType().colWidth); - context.setScale(udafc->resultType().scale); - context.setPrecision(udafc->resultType().precision); + // Set up the return type defaults for the call to init() + context.setResultType(udafc->resultType().colDataType); + context.setColWidth(udafc->resultType().colWidth); + context.setScale(udafc->resultType().scale); + context.setPrecision(udafc->resultType().precision); context.setParamCount(udafc->aggParms().size()); ColumnDatum colType; @@ -4533,7 +4536,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) colTypes[i] = colType; } - // Call the user supplied init() + // Call the user supplied init() mcsv1sdk::mcsv1_UDAF* udaf = context.getFunction(); if (!udaf) { @@ -4544,37 +4547,37 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) return NULL; } if (udaf->init(&context, colTypes) == mcsv1_UDAF::ERROR) - { - gwi.fatalParseError = true; - gwi.parseErrorText = udafc->getContext().getErrorMessage(); + { + gwi.fatalParseError = true; + gwi.parseErrorText = udafc->getContext().getErrorMessage(); if (ac) delete ac; - return NULL; - } + return NULL; + } // UDAF_OVER_REQUIRED means that this function is for Window // Function only. Reject it here in aggregate land. - if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) - { - gwi.fatalParseError = true; - gwi.parseErrorText = - logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, - context.getName()); + if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) + { + gwi.fatalParseError = true; + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, + context.getName()); if (ac) delete ac; - return NULL; - } - - // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; - ct.colDataType = context.getResultType(); - ct.colWidth = context.getColWidth(); - ct.scale = context.getScale(); - ct.precision = context.getPrecision(); - udafc->resultType(ct); + return NULL; } + + // Set the return type as set in init() + CalpontSystemCatalog::ColType ct; + ct.colDataType = context.getResultType(); + ct.colWidth = context.getColWidth(); + ct.scale = context.getScale(); + ct.precision = context.getPrecision(); + udafc->resultType(ct); } } + } catch (std::logic_error e) { gwi.fatalParseError = true; @@ -4744,6 +4747,7 @@ void gp_walk(const Item* item, void* arg) if (isp) { + // @bug 3669. trim trailing spaces for the compare value if (isp->result_type() == STRING_RESULT) { String val, *str = isp->val_str(&val); @@ -4754,7 +4758,10 @@ void gp_walk(const Item* item, void* arg) cval.assign(str->ptr(), str->length()); } + size_t spos = cval.find_last_not_of(" "); + if (spos != string::npos) + cval = cval.substr(0, spos + 1); gwip->rcWorkStack.push(new ConstantColumn(cval)); break; @@ -7908,8 +7915,15 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i setError(gwi.thd, ER_INTERNAL_ERROR, gwi.parseErrorText, gwi); return ER_CHECK_NOT_IMPLEMENTED; } - - (*coliter)->aggParms().push_back(minSc); + // Replace the last (presumably constant) object with minSc + if ((*coliter)->aggParms().empty()) + { + (*coliter)->aggParms().push_back(minSc); + } + else + { + (*coliter)->aggParms()[0] = minSc; + } } std::vector::iterator funciter; @@ -8075,9 +8089,9 @@ int cp_get_group_plan(THD* thd, SCSEP& csep, cal_impl_if::cal_group_info& gi) gwi.thd = thd; int status = getGroupPlan(gwi, select_lex, csep, gi); - cerr << "---------------- cp_get_group_plan EXECUTION PLAN ----------------" << endl; - cerr << *csep << endl ; - cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; +// cerr << "---------------- cp_get_group_plan EXECUTION PLAN ----------------" << endl; +// cerr << *csep << endl ; +// cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; if (status > 0) return ER_INTERNAL_ERROR; diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 043dcaac2..bead74aff 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -1723,17 +1723,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(pFunctionCol.get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF, i); - } - else - { - throw logic_error("(3)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colOut + 1, i); break; } @@ -2012,31 +2002,60 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu fRow.setLongDoubleField(fRow.getLongDoubleField(colAux + 1) + valIn * valIn, colAux + 1); } -void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) +void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, + int64_t colAux, uint64_t& funcColsIdx) { uint32_t paramCount = fRGContext.getParameterCount(); // The vector of parameters to be sent to the UDAF mcsv1sdk::ColumnDatum valsIn[paramCount]; uint32_t dataFlags[paramCount]; - + ConstantColumn* cc; + bool bIsNull = false; execplan::CalpontSystemCatalog::ColDataType colDataType; for (uint32_t i = 0; i < paramCount; ++i) { + // If UDAF_IGNORE_NULLS is on, bIsNull gets set the first time + // we find a null. We still need to eat the rest of the parameters + // to sync updateEntry + if (bIsNull) + { + ++funcColsIdx; + continue; + } + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; mcsv1sdk::ColumnDatum& datum = valsIn[i]; // Turn on NULL flags dataFlags[i] = 0; - if (isNull(&fRowGroupIn, rowIn, colIn) == true) + + // If this particular parameter is a constant, then we need + // to acces the constant value rather than a row value. + cc = NULL; + if (pFunctionCol->fpConstCol) + { + cc = dynamic_cast(pFunctionCol->fpConstCol.get()); + } + + if ((cc && cc->type() == ConstantColumn::NULLDATA) + || (!cc && isNull(&fRowGroupIn, rowIn, colIn) == true)) { if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { - return; + bIsNull = true; + ++funcColsIdx; + continue; } dataFlags[i] |= mcsv1sdk::PARAM_IS_NULL; } - - colDataType = fRowGroupIn.getColTypes()[colIn]; - if (!fRGContext.isParamNull(i)) + + if (cc) + { + colDataType = cc->resultType().colDataType; + } + else + { + colDataType = fRowGroupIn.getColTypes()[colIn]; + } + if (!(dataFlags[i] & mcsv1sdk::PARAM_IS_NULL)) { switch (colDataType) { @@ -2045,13 +2064,38 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::MEDINT: case execplan::CalpontSystemCatalog::INT: case execplan::CalpontSystemCatalog::BIGINT: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + if (cc) + { + datum.columnData = cc->getIntVal(const_cast(rowIn), bIsNull); + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + } + break; + } case execplan::CalpontSystemCatalog::DECIMAL: case execplan::CalpontSystemCatalog::UDECIMAL: { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - datum.scale = fRowGroupIn.getScale()[colIn]; - datum.precision = fRowGroupIn.getPrecision()[colIn]; + datum.dataType = colDataType; + if (cc) + { + datum.columnData = cc->getDecimalVal(const_cast(rowIn), bIsNull).value; + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + } break; } @@ -2062,7 +2106,14 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::UBIGINT: { datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); + if (cc) + { + datum.columnData = cc->getUintVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getUintField(colIn); + } break; } @@ -2070,7 +2121,14 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::UDOUBLE: { datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; - datum.columnData = rowIn.getDoubleField(colIn); + if (cc) + { + datum.columnData = cc->getDoubleVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getDoubleField(colIn); + } break; } @@ -2078,22 +2136,55 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::UFLOAT: { datum.dataType = execplan::CalpontSystemCatalog::FLOAT; - datum.columnData = rowIn.getFloatField(colIn); + if (cc) + { + datum.columnData = cc->getFloatVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getFloatField(colIn); + } break; } case execplan::CalpontSystemCatalog::DATE: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + if (cc) + { + datum.columnData = cc->getDateIntVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getUintField(colIn); + } + break; + } case execplan::CalpontSystemCatalog::DATETIME: { datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); + if (cc) + { + datum.columnData = cc->getDatetimeIntVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getUintField(colIn); + } break; } case execplan::CalpontSystemCatalog::TIME: { datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); + if (cc) + { + datum.columnData = cc->getTimeIntVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getIntField(colIn); + } break; } @@ -2105,7 +2196,14 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::BLOB: { datum.dataType = colDataType; - datum.columnData = rowIn.getStringField(colIn); + if (cc) + { + datum.columnData = cc->getStrVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getStringField(colIn); + } break; } @@ -2147,6 +2245,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { + RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColsIdx].get()); rowUDAF->bInterrupted = true; throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } @@ -2443,17 +2542,7 @@ void RowAggregationUM::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); - } - else - { - throw logic_error("(5)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colAux, i); break; } @@ -3991,17 +4080,7 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); - } - else - { - throw logic_error("(6)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colAux, i); break; } @@ -4199,20 +4278,20 @@ void RowAggregationUMP2::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut // colAux(in) - Where the UDAF userdata resides // rowUDAF(in) - pointer to the RowUDAFFunctionCol for this UDAF instance //------------------------------------------------------------------------------ -void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) +void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, + int64_t colAux, uint64_t& funcColsIdx) { static_any::any valOut; // Get the user data - boost::shared_ptr userData = rowIn.getUserData(colIn + 1); + boost::shared_ptr userDataIn = rowIn.getUserData(colIn+1); // Unlike other aggregates, the data isn't in colIn, so testing it for NULL // there won't help. In case of NULL, userData will be NULL. uint32_t flags[1]; flags[0] = 0; - if (!userData) + if (!userDataIn) { if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { @@ -4230,11 +4309,12 @@ void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, // Call the UDAF subEvaluate method mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userData.get()); + rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userDataIn.get()); fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { + RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColsIdx].get()); rowUDAF->bInterrupted = true; throw logging::IDBExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } @@ -4429,17 +4509,7 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); - } - else - { - throw logic_error("(7)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colAux, i); break; } From ea70806e93f19772edd65bc0a6a2164fec5e98ea Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 25 May 2018 12:56:29 -0500 Subject: [PATCH 16/19] MCOL-1201 Add support for UDAF multiple parm constants --- dbcon/execplan/constantcolumn.h | 2 + dbcon/joblist/jlf_common.cpp | 2 +- dbcon/joblist/joblistfactory.cpp | 10 +- dbcon/joblist/windowfunctionstep.cpp | 9 +- .../primproc/batchprimitiveprocessor.cpp | 16 +- utils/common/any.hpp | 7 +- utils/loggingcpp/errorcodes.cpp | 2 +- utils/messageqcpp/bytestream.h | 1 + utils/rowgroup/rowaggregation.h | 36 ++- utils/udfsdk/allnull.h | 1 - utils/udfsdk/avg_mode.h | 1 - utils/udfsdk/avgx.h | 1 - utils/udfsdk/mcsv1_udaf.h | 1 - utils/udfsdk/median.h | 1 - utils/udfsdk/regr_avgx.cpp | 6 +- utils/udfsdk/regr_avgx.h | 1 - utils/udfsdk/ssq.h | 1 - utils/udfsdk/udfsdk.vpj | 33 --- utils/windowfunction/wf_udaf.cpp | 276 +++++++++++------- utils/windowfunction/wf_udaf.h | 2 - utils/windowfunction/windowfunctiontype.cpp | 24 +- utils/windowfunction/windowfunctiontype.h | 7 +- 22 files changed, 265 insertions(+), 175 deletions(-) diff --git a/dbcon/execplan/constantcolumn.h b/dbcon/execplan/constantcolumn.h index 04098faae..be0731044 100644 --- a/dbcon/execplan/constantcolumn.h +++ b/dbcon/execplan/constantcolumn.h @@ -38,6 +38,8 @@ class ByteStream; */ namespace execplan { +class ConstantColumn; + /** * @brief A class to represent a constant return column * diff --git a/dbcon/joblist/jlf_common.cpp b/dbcon/joblist/jlf_common.cpp index f5dbeee17..4b1980d49 100644 --- a/dbcon/joblist/jlf_common.cpp +++ b/dbcon/joblist/jlf_common.cpp @@ -405,7 +405,7 @@ uint32_t getTupleKey(JobInfo& jobInfo, const SRCP& srcp, bool add) if (add) { - // setTupleInfo first if add is ture, ok if already set. + // setTupleInfo first if add is true, ok if already set. const SimpleColumn* sc = dynamic_cast(srcp.get()); if (sc != NULL) diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index 4cf7bccc5..033bf2643 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -300,6 +300,7 @@ const JobStepVector doProject(const RetColsVector& retCols, JobInfo& jobInfo) { const ArithmeticColumn* ac = NULL; const FunctionColumn* fc = NULL; + const ConstantColumn* cc = NULL; uint64_t eid = -1; CalpontSystemCatalog::ColType ct; ExpressionStep* es = new ExpressionStep(jobInfo); @@ -316,6 +317,11 @@ const JobStepVector doProject(const RetColsVector& retCols, JobInfo& jobInfo) eid = fc->expressionId(); ct = fc->resultType(); } + else if ((cc = dynamic_cast(retCols[i].get())) != NULL) + { + eid = cc->expressionId(); + ct = cc->resultType(); + } else { std::ostringstream errmsg; @@ -1004,7 +1010,9 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo for (uint32_t parm = 0; parm < aggParms.size(); ++parm) { - if (aggc->constCol().get() != NULL) + // Only do the optimization of converting to count(*) if + // there is only one parameter. + if (aggParms.size() == 1 && aggc->constCol().get() != NULL) { // replace the aggregate on constant with a count(*) SRCP clone; diff --git a/dbcon/joblist/windowfunctionstep.cpp b/dbcon/joblist/windowfunctionstep.cpp index 4d24f0b4b..2a93f680b 100644 --- a/dbcon/joblist/windowfunctionstep.cpp +++ b/dbcon/joblist/windowfunctionstep.cpp @@ -569,6 +569,7 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) for (RetColsVector::iterator i = jobInfo.windowCols.begin(); i < jobInfo.windowCols.end(); i++) { + bool isUDAF = false; // window function type WindowFunctionColumn* wc = dynamic_cast(i->get()); uint64_t ridx = getColumnIndex(*i, colIndexMap, jobInfo); // result index @@ -590,6 +591,7 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) // if (boost::iequals(wc->functionName(),"UDAF_FUNC") if (wc->functionName() == "UDAF_FUNC") { + isUDAF = true; ++wfsUserFunctionCount; } @@ -646,10 +648,13 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) // column type for functor templates int ct = 0; + if (isUDAF) + { + ct = wc->getUDAFContext().getResultType(); + } // make sure index is in range - if (fields.size() > 1 && fields[1] >= 0 && static_cast(fields[1]) < types.size()) + else if (fields.size() > 1 && fields[1] >= 0 && static_cast(fields[1]) < types.size()) ct = types[fields[1]]; - // workaround for functions using "within group (order by)" syntax string fn = boost::to_upper_copy(wc->functionName()); diff --git a/primitives/primproc/batchprimitiveprocessor.cpp b/primitives/primproc/batchprimitiveprocessor.cpp index bc56a7430..019761d39 100644 --- a/primitives/primproc/batchprimitiveprocessor.cpp +++ b/primitives/primproc/batchprimitiveprocessor.cpp @@ -1677,15 +1677,11 @@ void BatchPrimitiveProcessor::execute() } catch (logging::QueryDataExcept& qex) { - ostringstream os; - os << qex.what() << endl; - writeErrorMsg(os.str(), qex.errorCode()); + writeErrorMsg(qex.what(), qex.errorCode()); } catch (logging::DictionaryBufferOverflow& db) { - ostringstream os; - os << db.what() << endl; - writeErrorMsg(os.str(), db.errorCode()); + writeErrorMsg(db.what(), db.errorCode()); } catch (scalar_exception& se) { @@ -1758,15 +1754,11 @@ void BatchPrimitiveProcessor::execute() } catch (IDBExcept& iex) { - ostringstream os; - os << iex.what() << endl; - writeErrorMsg(os.str(), iex.errorCode(), true, false); + writeErrorMsg(iex.what(), iex.errorCode(), true, false); } catch (const std::exception& ex) { - ostringstream os; - os << ex.what() << endl; - writeErrorMsg(os.str(), logging::batchPrimitiveProcessorErr); + writeErrorMsg(ex.what(), logging::batchPrimitiveProcessorErr); } catch (...) { diff --git a/utils/common/any.hpp b/utils/common/any.hpp index 5408c5c87..63d05d3d2 100755 --- a/utils/common/any.hpp +++ b/utils/common/any.hpp @@ -11,15 +11,12 @@ #include #include +#include namespace static_any { namespace anyimpl { - struct bad_any_cast - { - }; - struct empty_any { }; @@ -266,7 +263,7 @@ public: T& cast() { if (policy != anyimpl::get_policy()) - throw anyimpl::bad_any_cast(); + throw std::runtime_error("static_any: type mismatch in cast"); T* r = reinterpret_cast(policy->get_value(&object)); return *r; } diff --git a/utils/loggingcpp/errorcodes.cpp b/utils/loggingcpp/errorcodes.cpp index 60919c906..4b4196800 100644 --- a/utils/loggingcpp/errorcodes.cpp +++ b/utils/loggingcpp/errorcodes.cpp @@ -29,7 +29,7 @@ using namespace std; namespace logging { -ErrorCodes::ErrorCodes(): fErrorCodes(), fPreamble("An unexpected condition within the query caused an internal processing error within InfiniDB. Please check the log files for more details. Additional Information: ") +ErrorCodes::ErrorCodes(): fErrorCodes(), fPreamble("An unexpected condition within the query caused an internal processing error within Columnstore. Please check the log files for more details. Additional Information: ") { fErrorCodes[batchPrimitiveStepErr] = "error in BatchPrimitiveStep."; fErrorCodes[tupleBPSErr] = "error in TupleBPS."; diff --git a/utils/messageqcpp/bytestream.h b/utils/messageqcpp/bytestream.h index d1a3f4988..f8453843e 100644 --- a/utils/messageqcpp/bytestream.h +++ b/utils/messageqcpp/bytestream.h @@ -35,6 +35,7 @@ #include "exceptclasses.h" #include "serializeable.h" +#include "any.hpp" class ByteStreamTestSuite; diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index 282f354fc..14e4313cf 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -50,6 +50,7 @@ #include "stlpoolallocator.h" #include "returnedcolumn.h" #include "mcsv1_udaf.h" +#include "constantcolumn.h" // To do: move code that depends on joblist to a proper subsystem. namespace joblist @@ -200,6 +201,13 @@ struct RowAggFunctionCol // 4. for duplicate - point to the real aggretate column to be copied from // Set only on UM, the fAuxColumnIndex is defaulted to fOutputColumnIndex+1 on PM. uint32_t fAuxColumnIndex; + + // For UDAF that have more than one parameter and some parameters are constant. + // There will be a series of RowAggFunctionCol created, one for each parameter. + // The first will be a RowUDAFFunctionCol. Subsequent ones will be RowAggFunctionCol + // with fAggFunction == ROWAGG_MULTI_PARM. Order is important. + // If this parameter is constant, that value is here. + SRCP fpConstCol; }; @@ -220,8 +228,11 @@ struct RowUDAFFunctionCol : public RowAggFunctionCol inputColIndex, outputColIndex, auxColIndex), bInterrupted(false) {} - RowUDAFFunctionCol(const RowUDAFFunctionCol& rhs) : RowAggFunctionCol(ROWAGG_UDAF, ROWAGG_FUNCT_UNDEFINE, - rhs.fInputColumnIndex, rhs.fOutputColumnIndex, rhs.fAuxColumnIndex), fUDAFContext(rhs.fUDAFContext) + RowUDAFFunctionCol(const RowUDAFFunctionCol& rhs) : + RowAggFunctionCol(ROWAGG_UDAF, ROWAGG_FUNCT_UNDEFINE, rhs.fInputColumnIndex, + rhs.fOutputColumnIndex, rhs.fAuxColumnIndex), + fUDAFContext(rhs.fUDAFContext), + bInterrupted(false) {} virtual ~RowUDAFFunctionCol() {} @@ -238,6 +249,16 @@ inline void RowAggFunctionCol::serialize(messageqcpp::ByteStream& bs) const bs << (uint8_t)fAggFunction; bs << fInputColumnIndex; bs << fOutputColumnIndex; + if (fpConstCol) + { + bs << (uint8_t)1; + fpConstCol.get()->serialize(bs); + } + else + { + bs << (uint8_t)0; + } + } inline void RowAggFunctionCol::deserialize(messageqcpp::ByteStream& bs) @@ -245,6 +266,13 @@ inline void RowAggFunctionCol::deserialize(messageqcpp::ByteStream& bs) bs >> (uint8_t&)fAggFunction; bs >> fInputColumnIndex; bs >> fOutputColumnIndex; + uint8_t t; + bs >> t; + if (t) + { + fpConstCol.reset(new ConstantColumn); + fpConstCol.get()->unserialize(bs); + } } inline void RowUDAFFunctionCol::serialize(messageqcpp::ByteStream& bs) const @@ -586,7 +614,7 @@ protected: virtual void doAvg(const Row&, int64_t, int64_t, int64_t); virtual void doStatistics(const Row&, int64_t, int64_t, int64_t); virtual void doBitOp(const Row&, int64_t, int64_t, int); - virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); + virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, uint64_t& funcColsIdx); virtual bool countSpecial(const RowGroup* pRG) { fRow.setIntField<8>(fRow.getIntField<8>(0) + pRG->getRowCount(), 0); @@ -902,7 +930,7 @@ protected: void doStatistics(const Row&, int64_t, int64_t, int64_t); void doGroupConcat(const Row&, int64_t, int64_t); void doBitOp(const Row&, int64_t, int64_t, int); - void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); + void doUDAF(const Row&, int64_t, int64_t, int64_t, uint64_t& funcColsIdx); bool countSpecial(const RowGroup* pRG) { return false; diff --git a/utils/udfsdk/allnull.h b/utils/udfsdk/allnull.h index da17f5d6b..6a727caf6 100644 --- a/utils/udfsdk/allnull.h +++ b/utils/udfsdk/allnull.h @@ -48,7 +48,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/avg_mode.h b/utils/udfsdk/avg_mode.h index 5722c5fea..fba1fcdcc 100644 --- a/utils/udfsdk/avg_mode.h +++ b/utils/udfsdk/avg_mode.h @@ -56,7 +56,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/avgx.h b/utils/udfsdk/avgx.h index 0569b6091..a830c6803 100644 --- a/utils/udfsdk/avgx.h +++ b/utils/udfsdk/avgx.h @@ -35,7 +35,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index df3f47649..e09228d77 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -68,7 +68,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/median.h b/utils/udfsdk/median.h index 142be6ba8..48bd93c70 100644 --- a/utils/udfsdk/median.h +++ b/utils/udfsdk/median.h @@ -56,7 +56,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/regr_avgx.cpp b/utils/udfsdk/regr_avgx.cpp index c7cc5b56e..aec4f361f 100644 --- a/utils/udfsdk/regr_avgx.cpp +++ b/utils/udfsdk/regr_avgx.cpp @@ -82,7 +82,7 @@ mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* { return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. } - if (valIn_x.empty() || valIn_y.empty()) + if (valIn_x.empty() || valIn_y.empty()) // Usually empty if NULL. Probably redundant { return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. } @@ -107,10 +107,6 @@ mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* { val = valIn_x.cast(); } - else if (valIn_x.compatible(longTypeId)) - { - val = valIn_x.cast(); - } else if (valIn_x.compatible(llTypeId)) { val = valIn_x.cast(); diff --git a/utils/udfsdk/regr_avgx.h b/utils/udfsdk/regr_avgx.h index f70f30d8c..27b8708f7 100644 --- a/utils/udfsdk/regr_avgx.h +++ b/utils/udfsdk/regr_avgx.h @@ -35,7 +35,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/ssq.h b/utils/udfsdk/ssq.h index 2cac61c2c..e27ecf1fa 100644 --- a/utils/udfsdk/ssq.h +++ b/utils/udfsdk/ssq.h @@ -56,7 +56,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index 3d3ac39ca..fe1f3fd0e 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -238,38 +238,5 @@ N="Makefile" Type="Makefile"/> - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index 5cd5243c5..2876fbf7e 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -451,7 +451,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { mcsv1sdk::mcsv1_UDAF::ReturnCode rc; uint64_t colOut = fFieldIndex[0]; - + bool isNull = false; if ((fFrameUnit == WF__FRAME_ROWS) || (fPrev == -1) || (!fPeer->operator()(getPointer(fRowData->at(c)), getPointer(fRowData->at(fPrev))))) @@ -468,13 +468,24 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // Put the parameter metadata (type, scale, precision) into valsIn mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + ConstantColumn* cc = NULL; for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) { - uint64_t colIn = fFieldIndex[i+1]; mcsv1sdk::ColumnDatum& datum = valsIn[i]; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colIn); + cc = static_cast(fConstantParms[i].get()); + if (cc) + { + datum.dataType = cc->resultType().colDataType; + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + uint64_t colIn = fFieldIndex[i+1]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } } if (b <= c && c <= e) @@ -494,12 +505,14 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) uint32_t flags[getContext().getParameterCount()]; for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { + cc = static_cast(fConstantParms[k].get()); uint64_t colIn = fFieldIndex[k+1]; mcsv1sdk::ColumnDatum& datum = valsIn[k]; // Turn on Null flags or skip based on respect nulls flags[k] = 0; - if (fRow.isNullValue(colIn) == true) + if ((!cc && fRow.isNullValue(colIn) == true) + || (cc && cc->type() == ConstantColumn::NULLDATA)) { if (!bRespectNulls) { @@ -510,133 +523,196 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - // MCOL-1201 Multi-Paramter calls - switch (datum.dataType) + if (!bHasNull && !(flags[k] & mcsv1sdk::PARAM_IS_NULL)) { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DECIMAL: + switch (datum.dataType) { - int64_t valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + int64_t valIn; + if (cc) { - continue; + valIn = cc->getIntVal(fRow, isNull); } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - case CalpontSystemCatalog::UDECIMAL: - { - uint64_t valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + int64_t valIn; + if (cc) { - continue; + valIn = cc->getDecimalVal(fRow, isNull).value; } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - { - double valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + uint64_t valIn; + if (cc) { - continue; + valIn = cc->getUintVal(fRow, isNull); } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - { - float valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + double valIn; + if (cc) { - continue; + valIn = cc->getDoubleVal(fRow, isNull); } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::VARBINARY: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::BLOB: - { - string valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + float valIn; + if (cc) { - continue; + valIn = cc->getFloatVal(fRow, isNull); } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - default: - { - string errStr = "(" + colType2String[i] + ")"; - errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); - cerr << errStr << endl; - throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::BLOB: + { + string valIn; + if (cc) + { + valIn = cc->getStrVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - break; + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + default: + { + string errStr = "(" + colType2String[i] + ")"; + errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + + break; + } } } // Skip if any value is NULL and respect nulls is off. diff --git a/utils/windowfunction/wf_udaf.h b/utils/windowfunction/wf_udaf.h index f7a4c4b08..fc3f9006d 100644 --- a/utils/windowfunction/wf_udaf.h +++ b/utils/windowfunction/wf_udaf.h @@ -53,8 +53,6 @@ public: // A class to control the execution of User Define Analytic Functions (UDAnF) // as defined by a specialization of mcsv1sdk::mcsv1_UDAF -// The template parameter is currently only used to support DISTINCT, as -// as that is done via a set template class WF_udaf : public WindowFunctionType { diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp index 4c5b4de32..f5598a7e5 100644 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -39,7 +39,6 @@ using namespace logging; using namespace ordering; #include "calpontsystemcatalog.h" -#include "constantcolumn.h" #include "dataconvert.h" // int64_t IDB_pow[19] using namespace execplan; @@ -228,6 +227,9 @@ WindowFunctionType::makeWindowFunction(const string& name, int ct, WindowFunctio break; } + // Copy the only the constant parameter pointers + af->constParms(wc->functionParms()); + return af; } @@ -634,6 +636,26 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos) return v; } +void WindowFunctionType::constParms(const std::vector& functionParms) +{ + // fConstantParms will end up with a copy of functionParms, but only + // the constant types will be copied. Other types will take up space but + // be NULL. This allows us to acces the constants without the overhead + // of dynamic_cast for every row. + for (size_t i = 0; i < functionParms.size(); ++i) + { + ConstantColumn* cc = dynamic_cast(functionParms[i].get()); + if (cc) + { + fConstantParms.push_back(functionParms[i]); + } + else + { + fConstantParms.push_back(SRCP(cc)); + } + } +} + } //namespace // vim:ts=4 sw=4: diff --git a/utils/windowfunction/windowfunctiontype.h b/utils/windowfunction/windowfunctiontype.h index 50732d3b5..efa1c548a 100644 --- a/utils/windowfunction/windowfunctiontype.h +++ b/utils/windowfunction/windowfunctiontype.h @@ -31,7 +31,7 @@ #include "returnedcolumn.h" #include "rowgroup.h" #include "windowframe.h" - +#include "constantcolumn.h" namespace ordering { @@ -198,6 +198,8 @@ public: fStep = step; } + void constParms(const std::vector& functionParms); + static boost::shared_ptr makeWindowFunction(const std::string&, int ct, WindowFunctionColumn* wc); protected: @@ -244,6 +246,9 @@ protected: // output and input field indices: [0] - output std::vector fFieldIndex; + // constant function parameters -- needed for udaf with constant + std::vector fConstantParms; + // row meta data rowgroup::RowGroup fRowGroup; rowgroup::Row fRow; From 5b3db71c8fd7a512abf22dae32991afcee5a1979 Mon Sep 17 00:00:00 2001 From: David Hall Date: Wed, 6 Jun 2018 10:34:35 -0500 Subject: [PATCH 17/19] MCOL-1201 fix some regressions --- dbcon/joblist/tupleaggregatestep.cpp | 52 +++++++++++++++++++++------- dbcon/mysql/ha_calpont_execplan.cpp | 26 +++++++++----- 2 files changed, 57 insertions(+), 21 deletions(-) diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index be0e2009d..0f981e68f 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -1505,10 +1505,17 @@ void TupleAggregateStep::prep1PhaseAggregate( typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); // If the param is const - ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) + if (udafc) { - funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + } + else + { + throw QueryDataExcept("prep1PhaseAggregate: UDAF multi function with no parms", aggregateFuncErr); } ++udafcParamIdx; } @@ -2123,10 +2130,17 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( multiParmIndexes.push_back(colAgg); ++colAgg; // If the param is const - ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) + if (udafc) { - funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + } + else + { + throw QueryDataExcept("prep1PhaseDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr); } ++udafcParamIdx; } @@ -3398,10 +3412,17 @@ void TupleAggregateStep::prep2PhasesAggregate( widthAggPm.push_back(width[colProj]); colAggPm++; // If the param is const - ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) + if (udafc) { - funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + } + else + { + throw QueryDataExcept("prep2PhasesAggregate: UDAF multi function with no parms", aggregateFuncErr); } ++udafcParamIdx; } @@ -4257,10 +4278,17 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( multiParmIndexes.push_back(colAggPm); colAggPm++; // If the param is const - ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) + if (udafc) { - funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + } + else + { + throw QueryDataExcept("prep2PhasesDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr); } ++udafcParamIdx; } diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index d030d1855..8df06c6b4 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4165,7 +4165,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) rowCol->columnVec(selCols); (dynamic_cast(ac))->orderCols(orderCols); parm.reset(rowCol); - ac->aggParms().push_back(parm); + ac->aggParms().push_back(parm); if (gc->str_separator()) { @@ -4311,15 +4311,15 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); } - if (ac) - delete ac; + if (ac) + delete ac; return NULL; } - if (parm) - { - // MCOL-1201 multi-argument aggregate - ac->aggParms().push_back(parm); - } + if (parm) + { + // MCOL-1201 multi-argument aggregate + ac->aggParms().push_back(parm); + } } } @@ -10033,7 +10033,15 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->aggParms().push_back(minSc); + // Replace the last (presumably constant) object with minSc + if ((*coliter)->aggParms().empty()) + { + (*coliter)->aggParms().push_back(minSc); + } + else + { + (*coliter)->aggParms()[0] = minSc; + } } std::vector::iterator funciter; From 2b77f0f284135f9dad93acb0ef609fec2d1ec34f Mon Sep 17 00:00:00 2001 From: David Hall Date: Wed, 6 Jun 2018 16:51:47 -0500 Subject: [PATCH 18/19] MCOL-1201 Handle NULLs in Window functions correctly. --- utils/windowfunction/wf_udaf.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index 2876fbf7e..ee48360f1 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -503,6 +503,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // NULL flags uint32_t flags[getContext().getParameterCount()]; + bHasNull = false; for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { cc = static_cast(fConstantParms[k].get()); @@ -715,11 +716,11 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) } } } - // Skip if any value is NULL and respect nulls is off. - if (bHasNull) - { - continue; - } + } + // Skip if any value is NULL and respect nulls is off. + if (bHasNull) + { + continue; } getContext().setDataFlags(flags); From 58f10055aa2826ec072beeecfda9567d8da31fd2 Mon Sep 17 00:00:00 2001 From: David Hall Date: Thu, 7 Jun 2018 10:05:35 -0500 Subject: [PATCH 19/19] MCOL-1201 Add regr_avgx to included functions --- dbcon/mysql/install_calpont_mysql.sh | 1 + utils/winport/win_setup_mysql_part4.sql | 1 + 2 files changed, 2 insertions(+) diff --git a/dbcon/mysql/install_calpont_mysql.sh b/dbcon/mysql/install_calpont_mysql.sh index e8eb5b2b0..e04371549 100755 --- a/dbcon/mysql/install_calpont_mysql.sh +++ b/dbcon/mysql/install_calpont_mysql.sh @@ -84,6 +84,7 @@ CREATE FUNCTION idbpartition RETURNS STRING soname 'libcalmysql.so'; CREATE FUNCTION idblocalpm RETURNS INTEGER soname 'libcalmysql.so'; CREATE FUNCTION mcssystemready RETURNS INTEGER soname 'libcalmysql.so'; CREATE FUNCTION mcssystemreadonly RETURNS INTEGER soname 'libcalmysql.so'; +CREATE AGGREGATE FUNCTION regr_avgx RETURNS REAL soname 'libcalmysql.dll'; CREATE DATABASE IF NOT EXISTS infinidb_vtable; CREATE DATABASE IF NOT EXISTS infinidb_querystats; diff --git a/utils/winport/win_setup_mysql_part4.sql b/utils/winport/win_setup_mysql_part4.sql index 3b75fbe98..d884214ec 100644 --- a/utils/winport/win_setup_mysql_part4.sql +++ b/utils/winport/win_setup_mysql_part4.sql @@ -18,4 +18,5 @@ CREATE FUNCTION idbextentmin RETURNS STRING soname 'libcalmysql.dll'; CREATE FUNCTION idbextentmax RETURNS STRING soname 'libcalmysql.dll'; CREATE FUNCTION idbpartition RETURNS STRING soname 'libcalmysql.dll'; CREATE FUNCTION idblocalpm RETURNS INTEGER soname 'libcalmysql.dll'; +CREATE AGGREGATE FUNCTION regr_avgx RETURNS REAL soname 'libcalmysql.dll';