diff --git a/dbcon/execplan/aggregatecolumn.cpp b/dbcon/execplan/aggregatecolumn.cpp index 18cba2607..5bce12d79 100644 --- a/dbcon/execplan/aggregatecolumn.cpp +++ b/dbcon/execplan/aggregatecolumn.cpp @@ -98,36 +98,6 @@ AggregateColumn::AggregateColumn(const uint32_t sessionID): { } -AggregateColumn::AggregateColumn(const AggOp aggOp, ReturnedColumn* parm, const uint32_t sessionID): - ReturnedColumn(sessionID), - fAggOp(aggOp), - fAsc(false), - fData(aggOp + "(" + parm->data() + ")") -{ - fFunctionParms.reset(parm); -} - -AggregateColumn::AggregateColumn(const AggOp aggOp, const string& content, const uint32_t sessionID): - ReturnedColumn(sessionID), - fAggOp(aggOp), - fAsc(false), - fData(aggOp + "(" + content + ")") -{ - // TODO: need to handle distinct - fFunctionParms.reset(new ArithmeticColumn(content)); -} - -// deprecated constructor. use function name as string -AggregateColumn::AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID): - ReturnedColumn(sessionID), - fFunctionName(functionName), - fAggOp(NOOP), - fAsc(false), - fData(functionName + "(" + parm->data() + ")") -{ - fFunctionParms.reset(parm); -} - // deprecated constructor. use function name as string AggregateColumn::AggregateColumn(const string& functionName, const string& content, const uint32_t sessionID): ReturnedColumn(sessionID), @@ -137,20 +107,21 @@ AggregateColumn::AggregateColumn(const string& functionName, const string& conte fData(functionName + "(" + content + ")") { // TODO: need to handle distinct - fFunctionParms.reset(new ArithmeticColumn(content)); + SRCP srcp(new ArithmeticColumn(content)); + fAggParms.push_back(srcp); } AggregateColumn::AggregateColumn( const AggregateColumn& rhs, const uint32_t sessionID ): ReturnedColumn(rhs, sessionID), fFunctionName (rhs.fFunctionName), fAggOp(rhs.fAggOp), - fFunctionParms(rhs.fFunctionParms), fTableAlias(rhs.tableAlias()), fAsc(rhs.asc()), fData(rhs.data()), fConstCol(rhs.fConstCol) { fAlias = rhs.alias(); + fAggParms = rhs.fAggParms; } /** @@ -166,10 +137,14 @@ const string AggregateColumn::toString() const if (fAlias.length() > 0) output << "/Alias: " << fAlias << endl; - if (fFunctionParms == 0) - output << "No arguments" << endl; + if (fAggParms.size() == 0) + output << "No arguments"; else - output << *fFunctionParms << endl; + for (uint32_t i = 0; i < fAggParms.size(); ++i) + { + output << *(fAggParms[i]) << " "; + } + output << endl; if (fConstCol) output << *fConstCol; @@ -191,10 +166,11 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const b << fFunctionName; b << static_cast(fAggOp); - if (fFunctionParms == 0) - b << (uint8_t) ObjectReader::NULL_CLASS; - else - fFunctionParms->serialize(b); + b << static_cast(fAggParms.size()); + for (uint32_t i = 0; i < fAggParms.size(); ++i) + { + fAggParms[i]->serialize(b); + } b << static_cast(fGroupByColList.size()); @@ -219,20 +195,26 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const void AggregateColumn::unserialize(messageqcpp::ByteStream& b) { - ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN); - fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end()); - fProjectColList.erase(fProjectColList.begin(), fProjectColList.end()); - ReturnedColumn::unserialize(b); - b >> fFunctionName; - b >> fAggOp; - //delete fFunctionParms; - fFunctionParms.reset( - dynamic_cast(ObjectReader::createTreeNode(b))); - messageqcpp::ByteStream::quadbyte size; messageqcpp::ByteStream::quadbyte i; ReturnedColumn* rc; + ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN); + fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end()); + fProjectColList.erase(fProjectColList.begin(), fProjectColList.end()); + fAggParms.erase(fAggParms.begin(), fAggParms.end()); + ReturnedColumn::unserialize(b); + b >> fFunctionName; + b >> fAggOp; + + b >> size; + for (i = 0; i < size; i++) + { + rc = dynamic_cast(ObjectReader::createTreeNode(b)); + SRCP srcp(rc); + fAggParms.push_back(srcp); + } + b >> size; for (i = 0; i < size; i++) @@ -261,6 +243,7 @@ void AggregateColumn::unserialize(messageqcpp::ByteStream& b) bool AggregateColumn::operator==(const AggregateColumn& t) const { const ReturnedColumn* rc1, *rc2; + AggParms::const_iterator it, it2; rc1 = static_cast(this); rc2 = static_cast(&t); @@ -277,16 +260,18 @@ bool AggregateColumn::operator==(const AggregateColumn& t) const if (fAggOp != t.fAggOp) return false; - if (fFunctionParms.get() != NULL && t.fFunctionParms.get() != NULL) + if (aggParms().size() != t.aggParms().size()) { - if (*fFunctionParms.get() != t.fFunctionParms.get()) + return false; + } + for (it = fAggParms.begin(), it2 = t.fAggParms.begin(); + it != fAggParms.end(); + ++it, ++it2) + { + if (**it != **it2) return false; } - else if (fFunctionParms.get() != NULL || t.fFunctionParms.get() != NULL) - return false; - //if (fAlias != t.fAlias) - // return false; if (fTableAlias != t.fTableAlias) return false; @@ -645,3 +630,4 @@ AggregateColumn::AggOp AggregateColumn::agname2num(const string& agname) } } // namespace execplan + diff --git a/dbcon/execplan/aggregatecolumn.h b/dbcon/execplan/aggregatecolumn.h index d1db7e5a4..b0884f179 100644 --- a/dbcon/execplan/aggregatecolumn.h +++ b/dbcon/execplan/aggregatecolumn.h @@ -40,6 +40,8 @@ class ByteStream; namespace execplan { +typedef std::vector AggParms; + /** * @brief A class to represent a aggregate return column * @@ -74,7 +76,8 @@ public: BIT_OR, BIT_XOR, GROUP_CONCAT, - UDAF + UDAF, + MULTI_PARM }; /** @@ -94,21 +97,6 @@ public: */ AggregateColumn(const uint32_t sessionID); - /** - * ctor - */ - AggregateColumn(const AggOp aggop, ReturnedColumn* parm, const uint32_t sessionID = 0); - - /** - * ctor - */ - AggregateColumn(const AggOp aggop, const std::string& content, const uint32_t sessionID = 0); - - /** - * ctor - */ - AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID = 0); - /** * ctor */ @@ -155,24 +143,27 @@ public: fAggOp = aggOp; } + /** get function parms - * - * set the function parms from this object */ - virtual const SRCP functionParms() const + virtual AggParms& aggParms() { - return fFunctionParms; + return fAggParms; + } + + virtual const AggParms& aggParms() const + { + return fAggParms; } /** set function parms - * - * set the function parms for this object */ - virtual void functionParms(const SRCP& functionParms) + virtual void aggParms(const AggParms& parms) { - fFunctionParms = functionParms; + fAggParms = parms; } + /** return a copy of this pointer * * deep copy of this pointer and return the copy @@ -325,9 +316,10 @@ protected: uint8_t fAggOp; /** - * A ReturnedColumn objects that are the arguments to this function + * ReturnedColumn objects that are the arguments to this + * function */ - SRCP fFunctionParms; + AggParms fAggParms; /** table alias * A string to represent table alias name which contains this column diff --git a/dbcon/execplan/constantcolumn.h b/dbcon/execplan/constantcolumn.h index 04098faae..be0731044 100644 --- a/dbcon/execplan/constantcolumn.h +++ b/dbcon/execplan/constantcolumn.h @@ -38,6 +38,8 @@ class ByteStream; */ namespace execplan { +class ConstantColumn; + /** * @brief A class to represent a constant return column * diff --git a/dbcon/joblist/expressionstep.cpp b/dbcon/joblist/expressionstep.cpp index 0e064c359..4a8a14ff3 100644 --- a/dbcon/joblist/expressionstep.cpp +++ b/dbcon/joblist/expressionstep.cpp @@ -56,6 +56,17 @@ using namespace rowgroup; namespace joblist { +ExpressionStep::ExpressionStep() : + fExpressionFilter(NULL), + fExpressionId(-1), + fVarBinOK(false), + fSelectFilter(false), + fAssociatedJoinId(0), + fDoJoin(false), + fVirtual(false) +{ +} + ExpressionStep::ExpressionStep(const JobInfo& jobInfo) : JobStep(jobInfo), fExpressionFilter(NULL), @@ -68,7 +79,6 @@ ExpressionStep::ExpressionStep(const JobInfo& jobInfo) : { } - ExpressionStep::ExpressionStep(const ExpressionStep& rhs) : JobStep(rhs), fExpression(rhs.expression()), diff --git a/dbcon/joblist/expressionstep.h b/dbcon/joblist/expressionstep.h index 4a069440f..63423fc7d 100644 --- a/dbcon/joblist/expressionstep.h +++ b/dbcon/joblist/expressionstep.h @@ -50,6 +50,7 @@ class ExpressionStep : public JobStep { public: // constructors + ExpressionStep(); ExpressionStep(const JobInfo&); // destructor constructors virtual ~ExpressionStep(); diff --git a/dbcon/joblist/groupconcat.cpp b/dbcon/joblist/groupconcat.cpp index 234fc0a8e..afc91a2ec 100644 --- a/dbcon/joblist/groupconcat.cpp +++ b/dbcon/joblist/groupconcat.cpp @@ -78,7 +78,7 @@ void GroupConcatInfo::prepGroupConcat(JobInfo& jobInfo) while (i != jobInfo.groupConcatCols.end()) { GroupConcatColumn* gcc = dynamic_cast(i->get()); - const RowColumn* rcp = dynamic_cast(gcc->functionParms().get()); + const RowColumn* rcp = dynamic_cast(gcc->aggParms()[0].get()); SP_GroupConcat groupConcat(new GroupConcat); groupConcat->fSeparator = gcc->separator(); diff --git a/dbcon/joblist/jlf_common.cpp b/dbcon/joblist/jlf_common.cpp index f5dbeee17..4b1980d49 100644 --- a/dbcon/joblist/jlf_common.cpp +++ b/dbcon/joblist/jlf_common.cpp @@ -405,7 +405,7 @@ uint32_t getTupleKey(JobInfo& jobInfo, const SRCP& srcp, bool add) if (add) { - // setTupleInfo first if add is ture, ok if already set. + // setTupleInfo first if add is true, ok if already set. const SimpleColumn* sc = dynamic_cast(srcp.get()); if (sc != NULL) diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index a48ecd13a..033bf2643 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -18,7 +18,6 @@ // $Id: joblistfactory.cpp 9632 2013-06-18 22:18:20Z xlou $ - #include #include #include @@ -301,6 +300,7 @@ const JobStepVector doProject(const RetColsVector& retCols, JobInfo& jobInfo) { const ArithmeticColumn* ac = NULL; const FunctionColumn* fc = NULL; + const ConstantColumn* cc = NULL; uint64_t eid = -1; CalpontSystemCatalog::ColType ct; ExpressionStep* es = new ExpressionStep(jobInfo); @@ -317,6 +317,11 @@ const JobStepVector doProject(const RetColsVector& retCols, JobInfo& jobInfo) eid = fc->expressionId(); ct = fc->resultType(); } + else if ((cc = dynamic_cast(retCols[i].get())) != NULL) + { + eid = cc->expressionId(); + ct = cc->resultType(); + } else { std::ostringstream errmsg; @@ -870,7 +875,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo if (gcc != NULL) { - srcp = gcc->functionParms(); + srcp = gcc->aggParms()[0]; const RowColumn* rcp = dynamic_cast(srcp.get()); const vector& cols = rcp->columnVec(); @@ -891,21 +896,55 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } +#if 0 + // MCOL-1201 Add support for multi-parameter UDAnF + UDAFColumn* udafc = dynamic_cast(retCols[i].get()); + if (udafc != NULL) + { + srcp = udafc->aggParms()[0]; + const RowColumn* rcp = dynamic_cast(srcp.get()); + const vector& cols = rcp->columnVec(); + for (vector::const_iterator j = cols.begin(); j != cols.end(); j++) + { + srcp = *j; + if (dynamic_cast(srcp.get()) == NULL) + retCols.push_back(srcp); + + // Do we need this? + const ArithmeticColumn* ac = dynamic_cast(srcp.get()); + const FunctionColumn* fc = dynamic_cast(srcp.get()); + if (ac != NULL || fc != NULL) + { + // bug 3728, make a dummy expression step for each expression. + scoped_ptr es(new ExpressionStep(jobInfo)); + es->expression(srcp, jobInfo); + } + } + continue; + } +#endif srcp = retCols[i]; const AggregateColumn* ag = dynamic_cast(retCols[i].get()); - - if (ag != NULL) - srcp = ag->functionParms(); - - const ArithmeticColumn* ac = dynamic_cast(srcp.get()); - const FunctionColumn* fc = dynamic_cast(srcp.get()); - - if (ac != NULL || fc != NULL) + // bug 3728 Make a dummy expression for srcp if it is an + // expression. This is needed to fill in some stuff. + // Note that es.expression does nothing if the item is not an expression. + if (ag == NULL) { - // bug 3728, make a dummy expression step for each expression. - scoped_ptr es(new ExpressionStep(jobInfo)); - es->expression(srcp, jobInfo); + // Not an aggregate. Make a dummy expression for the item + ExpressionStep es; + es.expression(srcp, jobInfo); + } + else + { + // MCOL-1201 multi-argument aggregate. make a dummy expression + // step for each argument that is an expression. + for (uint32_t i = 0; i < ag->aggParms().size(); ++i) + { + srcp = ag->aggParms()[i]; + ExpressionStep es; + es.expression(srcp, jobInfo); + } } } @@ -915,17 +954,18 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo { srcp = retCols[i]; const SimpleColumn* sc = dynamic_cast(srcp.get()); + AggregateColumn* aggc = dynamic_cast(srcp.get()); bool doDistinct = (csep->distinct() && csep->groupByCols().empty()); uint32_t tupleKey = -1; string alias; string view; - // returned column could be groupby column, a simplecoulumn not a agregatecolumn + // returned column could be groupby column, a simplecoulumn not an aggregatecolumn int op = 0; CalpontSystemCatalog::OID dictOid = 0; CalpontSystemCatalog::ColType ct, aggCt; - if (sc == NULL) + if (aggc) { GroupConcatColumn* gcc = dynamic_cast(retCols[i].get()); @@ -939,7 +979,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo tupleKey = ti.key; jobInfo.returnedColVec.push_back(make_pair(tupleKey, gcc->aggOp())); // not a tokenOnly column. Mark all the columns involved - srcp = gcc->functionParms(); + srcp = gcc->aggParms()[0]; const RowColumn* rowCol = dynamic_cast(srcp.get()); if (rowCol) @@ -963,186 +1003,355 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } - - AggregateColumn* ac = dynamic_cast(retCols[i].get()); - - if (ac != NULL) + else { - srcp = ac->functionParms(); - sc = dynamic_cast(srcp.get()); + // Aggregate column not group concat + AggParms& aggParms = aggc->aggParms(); - if (ac->constCol().get() != NULL) + for (uint32_t parm = 0; parm < aggParms.size(); ++parm) { - // replace the aggregate on constant with a count(*) - SRCP clone; - UDAFColumn* udafc = dynamic_cast(ac); - - if (udafc) + // Only do the optimization of converting to count(*) if + // there is only one parameter. + if (aggParms.size() == 1 && aggc->constCol().get() != NULL) { - clone.reset(new UDAFColumn(*udafc, ac->sessionID())); + // replace the aggregate on constant with a count(*) + SRCP clone; + UDAFColumn* udafc = dynamic_cast(aggc); + + if (udafc) + { + clone.reset(new UDAFColumn(*udafc, aggc->sessionID())); + } + else + { + clone.reset(new AggregateColumn(*aggc, aggc->sessionID())); + } + + jobInfo.constAggregate.insert(make_pair(i, clone)); + aggc->aggOp(AggregateColumn::COUNT_ASTERISK); + aggc->distinct(false); + } + + srcp = aggParms[parm]; + sc = dynamic_cast(srcp.get()); + if (parm == 0) + { + op = aggc->aggOp(); } else { - clone.reset(new AggregateColumn(*ac, ac->sessionID())); + op = AggregateColumn::MULTI_PARM; + } + doDistinct = aggc->distinct(); + if (aggParms.size() == 1) + { + // Set the col type based on the single parm. + // Changing col type based on a parm if multiple parms + // doesn't really make sense. + updateAggregateColType(aggc, srcp, op, jobInfo); + } + aggCt = aggc->resultType(); + + // As of bug3695, make sure varbinary is not used in aggregation. + // TODO: allow for UDAF + if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY) + throw runtime_error ("VARBINARY in aggregate function is not supported."); + + // Project the parm columns or expressions + if (sc != NULL) + { + CalpontSystemCatalog::OID retOid = sc->oid(); + CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); + alias = extractTableAlias(sc); + view = sc->viewName(); + + if (!sc->schemaName().empty()) + { + ct = sc->colType(); + + //XXX use this before connector sets colType in sc correctly. + if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) + ct = jobInfo.csc->colType(sc->oid()); + + //X + dictOid = isDictCol(ct); + } + else + { + retOid = (tblOid + 1) + sc->colPosition(); + ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; + } + + TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); + tupleKey = ti.key; + + // this is a string column + if (dictOid > 0) + { + map::iterator findit = jobInfo.tokenOnly.find(tupleKey); + + // if the column has never seen, and the op is count: possible need count only. + if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) + { + if (findit == jobInfo.tokenOnly.end()) + jobInfo.tokenOnly[tupleKey] = true; + } + // if aggregate other than count, token is not enough. + else if (op != 0 || doDistinct) + { + jobInfo.tokenOnly[tupleKey] = false; + } + + findit = jobInfo.tokenOnly.find(tupleKey); + + if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) + { + dictMap[tupleKey] = dictOid; + jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; + ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); + jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + } + } + } + else + { + const ArithmeticColumn* ac = NULL; + const FunctionColumn* fc = NULL; + const WindowFunctionColumn* wc = NULL; + bool hasAggCols = false; + + if ((ac = dynamic_cast(srcp.get())) != NULL) + { + if (ac->aggColumnList().size() > 0) + hasAggCols = true; + } + else if ((fc = dynamic_cast(srcp.get())) != NULL) + { + if (fc->aggColumnList().size() > 0) + hasAggCols = true; + } + else if (dynamic_cast(srcp.get()) != NULL) + { + std::ostringstream errmsg; + errmsg << "Invalid aggregate function nesting."; + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + else if (dynamic_cast(srcp.get()) != NULL) + { + } + else if ((wc = dynamic_cast(srcp.get())) == NULL) + { + std::ostringstream errmsg; + errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + + uint64_t eid = srcp.get()->expressionId(); + ct = srcp.get()->resultType(); + TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); + tupleKey = ti.key; + + if (hasAggCols) + jobInfo.expressionVec.push_back(tupleKey); } - jobInfo.constAggregate.insert(make_pair(i, clone)); - ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ac->distinct(false); - } + // add to project list + vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - op = ac->aggOp(); - doDistinct = ac->distinct(); - updateAggregateColType(ac, srcp, op, jobInfo); - aggCt = ac->resultType(); + if (keyIt == projectKeys.end()) + { + RetColsVector::iterator it = pcv.end(); - // As of bug3695, make sure varbinary is not used in aggregation. - if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY) - throw runtime_error ("VARBINARY in aggregate function is not supported."); - } - } + if (doDistinct) + it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); + else + it = pcv.insert(pcv.end(), srcp); - // simple column selected or aggregated - if (sc != NULL) - { - // one column only need project once - CalpontSystemCatalog::OID retOid = sc->oid(); - CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); - alias = extractTableAlias(sc); - view = sc->viewName(); + projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); + } + else if (doDistinct) // @bug4250, move forward distinct column if necessary. + { + uint32_t pos = distance(projectKeys.begin(), keyIt); - if (!sc->schemaName().empty()) - { - ct = sc->colType(); + if (pos >= lastGroupByPos) + { + pcv[pos] = pcv[lastGroupByPos]; + pcv[lastGroupByPos] = srcp; + projectKeys[pos] = projectKeys[lastGroupByPos]; + projectKeys[lastGroupByPos] = tupleKey; + lastGroupByPos++; + } + } -//XXX use this before connector sets colType in sc correctly. - if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) - ct = jobInfo.csc->colType(sc->oid()); + if (doDistinct && dictOid > 0) + tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; -//X - dictOid = isDictCol(ct); - } - else - { - retOid = (tblOid + 1) + sc->colPosition(); - ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; - } + // remember the columns to be returned + jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); - TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); - tupleKey = ti.key; + if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) + jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; - // this is a string column - if (dictOid > 0) - { - map::iterator findit = jobInfo.tokenOnly.find(tupleKey); - - // if the column has never seen, and the op is count: possible need count only. - if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) - { - if (findit == jobInfo.tokenOnly.end()) - jobInfo.tokenOnly[tupleKey] = true; - } - // if aggregate other than count, token is not enough. - else if (op != 0 || doDistinct) - { - jobInfo.tokenOnly[tupleKey] = false; - } - - findit = jobInfo.tokenOnly.find(tupleKey); - - if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) - { - dictMap[tupleKey] = dictOid; - jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; - ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); - jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } } else { - const ArithmeticColumn* ac = NULL; - const FunctionColumn* fc = NULL; - const WindowFunctionColumn* wc = NULL; - bool hasAggCols = false; - - if ((ac = dynamic_cast(srcp.get())) != NULL) + // Not an Aggregate + // simple column selected + if (sc != NULL) { - if (ac->aggColumnList().size() > 0) - hasAggCols = true; + // one column only need project once + CalpontSystemCatalog::OID retOid = sc->oid(); + CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); + alias = extractTableAlias(sc); + view = sc->viewName(); + + if (!sc->schemaName().empty()) + { + ct = sc->colType(); + + //XXX use this before connector sets colType in sc correctly. + if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) + ct = jobInfo.csc->colType(sc->oid()); + + //X + dictOid = isDictCol(ct); + } + else + { + retOid = (tblOid + 1) + sc->colPosition(); + ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; + } + + TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); + tupleKey = ti.key; + + // this is a string column + if (dictOid > 0) + { + map::iterator findit = jobInfo.tokenOnly.find(tupleKey); + + // if the column has never seen, and the op is count: possible need count only. + if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) + { + if (findit == jobInfo.tokenOnly.end()) + jobInfo.tokenOnly[tupleKey] = true; + } + // if aggregate other than count, token is not enough. + else if (op != 0 || doDistinct) + { + jobInfo.tokenOnly[tupleKey] = false; + } + + findit = jobInfo.tokenOnly.find(tupleKey); + + if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) + { + dictMap[tupleKey] = dictOid; + jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; + ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); + jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + } + } } - else if ((fc = dynamic_cast(srcp.get())) != NULL) - { - if (fc->aggColumnList().size() > 0) - hasAggCols = true; - } - else if (dynamic_cast(srcp.get()) != NULL) - { - std::ostringstream errmsg; - errmsg << "Invalid aggregate function nesting."; - cerr << boldStart << errmsg.str() << boldStop << endl; - throw logic_error(errmsg.str()); - } - else if ((wc = dynamic_cast(srcp.get())) == NULL) - { - std::ostringstream errmsg; - errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); - cerr << boldStart << errmsg.str() << boldStop << endl; - throw logic_error(errmsg.str()); - } - - uint64_t eid = srcp.get()->expressionId(); - ct = srcp.get()->resultType(); - TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); - tupleKey = ti.key; - - if (hasAggCols) - jobInfo.expressionVec.push_back(tupleKey); - } - - // add to project list - vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - - if (keyIt == projectKeys.end()) - { - RetColsVector::iterator it = pcv.end(); - - if (doDistinct) - it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); else - it = pcv.insert(pcv.end(), srcp); - - projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); - } - else if (doDistinct) // @bug4250, move forward distinct column if necessary. - { - uint32_t pos = distance(projectKeys.begin(), keyIt); - - if (pos >= lastGroupByPos) { - pcv[pos] = pcv[lastGroupByPos]; - pcv[lastGroupByPos] = srcp; - projectKeys[pos] = projectKeys[lastGroupByPos]; - projectKeys[lastGroupByPos] = tupleKey; - lastGroupByPos++; + const ArithmeticColumn* ac = NULL; + const FunctionColumn* fc = NULL; + const WindowFunctionColumn* wc = NULL; + bool hasAggCols = false; + + if ((ac = dynamic_cast(srcp.get())) != NULL) + { + if (ac->aggColumnList().size() > 0) + hasAggCols = true; + } + else if ((fc = dynamic_cast(srcp.get())) != NULL) + { + if (fc->aggColumnList().size() > 0) + hasAggCols = true; + } + else if (dynamic_cast(srcp.get()) != NULL) + { + std::ostringstream errmsg; + errmsg << "Invalid aggregate function nesting."; + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + else if (dynamic_cast(srcp.get()) != NULL) + { + } + else if ((wc = dynamic_cast(srcp.get())) == NULL) + { + std::ostringstream errmsg; + errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + + uint64_t eid = srcp.get()->expressionId(); + ct = srcp.get()->resultType(); + TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); + tupleKey = ti.key; + + if (hasAggCols) + jobInfo.expressionVec.push_back(tupleKey); } - } - if (doDistinct && dictOid > 0) - tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + // add to project list + vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - // remember the columns to be returned - jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + if (keyIt == projectKeys.end()) + { + RetColsVector::iterator it = pcv.end(); - if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) - jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + if (doDistinct) + it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); + else + it = pcv.insert(pcv.end(), srcp); - // bug 1499 distinct processing, save unique distinct columns - if (doDistinct && - (jobInfo.distinctColVec.end() == - find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) - { - jobInfo.distinctColVec.push_back(tupleKey); + projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); + } + else if (doDistinct) // @bug4250, move forward distinct column if necessary. + { + uint32_t pos = distance(projectKeys.begin(), keyIt); + + if (pos >= lastGroupByPos) + { + pcv[pos] = pcv[lastGroupByPos]; + pcv[lastGroupByPos] = srcp; + projectKeys[pos] = projectKeys[lastGroupByPos]; + projectKeys[lastGroupByPos] = tupleKey; + lastGroupByPos++; + } + } + + if (doDistinct && dictOid > 0) + tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + + // remember the columns to be returned + jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + + if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) + jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 9e23ac17b..0f981e68f 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -164,6 +164,9 @@ inline RowAggFunctionType functionIdMap(int planFuncId) case AggregateColumn::UDAF: return ROWAGG_UDAF; + case AggregateColumn::MULTI_PARM: + return ROWAGG_MULTI_PARM; + default: return ROWAGG_FUNCT_UNDEFINE; } @@ -849,7 +852,6 @@ SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo) if (ac->aggOp() == ROWAGG_UDAF) { UDAFColumn* udafc = dynamic_cast(ac); - if (udafc) { constAggDataVec.push_back( @@ -1094,8 +1096,10 @@ void TupleAggregateStep::prep1PhaseAggregate( vector functionVec; uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); - uint32_t projColsUDAFIndex = 0; - + // For UDAF + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; + UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // for count column of average function map avgFuncMap; @@ -1136,6 +1140,7 @@ void TupleAggregateStep::prep1PhaseAggregate( // populate the aggregate rowgroup AGG_MAP aggFuncMap; + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -1153,8 +1158,9 @@ void TupleAggregateStep::prep1PhaseAggregate( typeAgg.push_back(ti.dtype); widthAgg.push_back(ti.width); SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( - aggOp, stats, 0, i, jobInfo.cntStarPos)); + aggOp, stats, 0, outIdx, jobInfo.cntStarPos)); functionVec.push_back(funct); + ++outIdx; continue; } @@ -1170,9 +1176,10 @@ void TupleAggregateStep::prep1PhaseAggregate( typeAgg.push_back(ti.dtype); widthAgg.push_back(width); SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( - aggOp, stats, 0, i, -1)); + aggOp, stats, 0, outIdx, -1)); functionVec.push_back(funct); + ++outIdx; continue; } @@ -1218,16 +1225,17 @@ void TupleAggregateStep::prep1PhaseAggregate( widthAgg.push_back(width[colProj]); if (groupBy[it->second]->fOutputColumnIndex == (uint32_t) - 1) - groupBy[it->second]->fOutputColumnIndex = i; + groupBy[it->second]->fOutputColumnIndex = outIdx; else functionVec.push_back(SP_ROWAGG_FUNC_t( new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, - i, + outIdx, groupBy[it->second]->fOutputColumnIndex))); + ++outIdx; continue; } else if (find(jobInfo.expressionVec.begin(), jobInfo.expressionVec.end(), key) != @@ -1240,6 +1248,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(ti.precision); typeAgg.push_back(ti.dtype); widthAgg.push_back(ti.width); + ++outIdx; continue; } else if (jobInfo.groupConcatInfo.columns().find(key) != @@ -1252,6 +1261,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); + ++outIdx; continue; } else if (jobInfo.windowSet.find(key) != jobInfo.windowSet.end()) @@ -1263,6 +1273,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); + ++outIdx; continue; } else @@ -1283,31 +1294,27 @@ void TupleAggregateStep::prep1PhaseAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - + udafc = dynamic_cast((*it).get()); + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, i)); + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, outIdx)); break; } - } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colProj, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colProj, outIdx)); } functionVec.push_back(funct); @@ -1468,11 +1475,9 @@ void TupleAggregateStep::prep1PhaseAggregate( if (!udafFuncCol) { - throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } - pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); - // Return column oidsAgg.push_back(oidsProj[colProj]); keysAgg.push_back(key); @@ -1480,9 +1485,42 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(udafFuncCol->fUDAFContext.getPrecision()); typeAgg.push_back(udafFuncCol->fUDAFContext.getResultType()); widthAgg.push_back(udafFuncCol->fUDAFContext.getColWidth()); + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } + case ROWAGG_MULTI_PARM: + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(key); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + widthAgg.push_back(width[colProj]); + // If the param is const + if (udafc) + { + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + } + else + { + throw QueryDataExcept("prep1PhaseAggregate: UDAF multi function with no parms", aggregateFuncErr); + } + ++udafcParamIdx; + } + break; + default: { ostringstream emsg; @@ -1512,6 +1550,11 @@ void TupleAggregateStep::prep1PhaseAggregate( { aggFuncMap.insert(make_pair(boost::make_tuple(key, aggOp, pUDAFFunc), funct->fOutputColumnIndex)); } + + if (aggOp != ROWAGG_MULTI_PARM) + { + ++outIdx; + } } // now fix the AVG function, locate the count(column) position @@ -1560,7 +1603,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(3)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVec[i]->fAuxColumnIndex = lastCol++; @@ -1663,9 +1706,14 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( uint32_t bigIntWidth = sizeof(int64_t); // map key = column key, operation (enum), and UDAF pointer if UDAF. AGG_MAP aggFuncMap; +// set avgSet; + list multiParmIndexes; + + // fOR udaf + UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; - set avgSet; - uint32_t projColsUDAFIndex = 0; + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; // for count column of average function map avgFuncMap, avgDistFuncMap; @@ -1675,7 +1723,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation map projColPosMap; @@ -1814,9 +1862,9 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } // skip sum / count(column) if avg is also selected - if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) && - (avgSet.find(aggKey) != avgSet.end())) - continue; +// if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) && +// (avgSet.find(aggKey) != avgSet.end())) +// continue; if (aggOp == ROWAGG_DISTINCT_SUM || aggOp == ROWAGG_DISTINCT_AVG || @@ -1829,12 +1877,12 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + udafc = dynamic_cast((*it).get()); + projColsUDAFIdx++; if (udafc) { @@ -1843,12 +1891,10 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAgg)); break; } - } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -2043,7 +2089,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (!udafFuncCol) { - throw logic_error("prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); } // Return column @@ -2054,7 +2100,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(udafFuncCol->fUDAFContext.getResultType()); widthAgg.push_back(udafFuncCol->fUDAFContext.getColWidth()); ++colAgg; - // UDAF Dummy holder for UserData struct + // Column for index of UDAF UserData struct oidsAgg.push_back(oidsProj[colProj]); keysAgg.push_back(aggKey); scaleAgg.push_back(0); @@ -2062,9 +2108,44 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(CalpontSystemCatalog::UBIGINT); widthAgg.push_back(sizeof(uint64_t)); funct->fAuxColumnIndex = colAgg++; + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } + case ROWAGG_MULTI_PARM: + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(aggKey); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + widthAgg.push_back(widthProj[colProj]); + multiParmIndexes.push_back(colAgg); + ++colAgg; + // If the param is const + if (udafc) + { + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + } + else + { + throw QueryDataExcept("prep1PhaseDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr); + } + ++udafcParamIdx; + } + break; + default: { ostringstream emsg; @@ -2101,7 +2182,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. AGG_MAP aggDupFuncMap; - pUDAFFunc = NULL; + projColsUDAFIdx = 0; + int64_t multiParms = 0; // copy over the groupby vector // update the outputColumnIndex if returned @@ -2111,16 +2193,25 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( groupByNoDist.push_back(groupby); aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i)); } - + // locate the return column position in aggregated rowgroup + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { + udafc = NULL; pUDAFFunc = NULL; uint32_t retKey = returnedColVec[i].first; RowAggFunctionType aggOp = functionIdMap(returnedColVec[i].second); RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colAgg = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on final agg.: Extra parms for an aggregate have no work there. + ++multiParms; + continue; + } + if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { @@ -2146,6 +2237,25 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } } + if (aggOp == ROWAGG_UDAF) + { + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIdx++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } + } + switch (aggOp) { case ROWAGG_DISTINCT_AVG: @@ -2396,7 +2506,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep1PhaseDistinctAggregate: " << emsg << " oid=" @@ -2420,7 +2530,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByNoDist[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByNoDist[j]->fOutputColumnIndex = i; + groupByNoDist[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByNoDist[j]->fOutputColumnIndex; } @@ -2430,13 +2540,20 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (dupGroupbyIndex != -1) functionVec2.push_back(SP_ROWAGG_FUNC_t( new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } - - // update the aggregate function vector else { - SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + // update the aggregate function vector + SP_ROWAGG_FUNC_t funct; + if (aggOp == ROWAGG_UDAF) + { + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, outIdx)); + } + else + { + funct.reset(new RowAggFunctionCol(aggOp, stats, colAgg, outIdx)); + } if (aggOp == ROWAGG_COUNT_NO_OP) funct->fAuxColumnIndex = colAgg; @@ -2472,6 +2589,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // for (i // now fix the AVG function, locate the count(column) position @@ -2489,7 +2607,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -2549,7 +2667,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep1PhaseDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVec2[i]->fAuxColumnIndex = lastCol++; @@ -2704,6 +2822,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); groupBySub.push_back(groupby); + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; + // tricky part : 2 function vectors // -- dummy function vector for sub-aggregator, which does distinct only // -- aggregate function on this distinct column for rowAggDist @@ -2711,6 +2834,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } if (returnedColVec[k].first != distinctColKey) continue; @@ -2731,7 +2859,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -2750,9 +2878,15 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( { vector functionSub1 = functionNoDistVec; vector functionSub2; + int64_t multiParms = 0; for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } // search non-distinct functions in functionVec vector::iterator it = functionVec2.begin(); @@ -2768,7 +2902,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } else if ((f->fOutputColumnIndex == k) && @@ -2790,7 +2924,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -2843,7 +2977,11 @@ void TupleAggregateStep::prep2PhasesAggregate( vector > aggColVec; set avgSet; vector >& returnedColVec = jobInfo.returnedColVec; - uint32_t projColsUDAFIndex = 0; + // For UDAF + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -2886,14 +3024,13 @@ void TupleAggregateStep::prep2PhasesAggregate( uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // associate the columns between projected RG and aggregate RG on PM // populated the aggregate columns // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation vector width; map projColPosMap; @@ -3022,13 +3159,11 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - + udafc = dynamic_cast((*it).get()); + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3036,12 +3171,10 @@ void TupleAggregateStep::prep2PhasesAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAggPm)); break; } - } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -3240,7 +3373,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } oidsAggPm.push_back(oidsProj[colProj]); @@ -3258,9 +3391,43 @@ void TupleAggregateStep::prep2PhasesAggregate( typeAggPm.push_back(CalpontSystemCatalog::UBIGINT); widthAggPm.push_back(bigUintWidth); funct->fAuxColumnIndex = colAggPm++; + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } + case ROWAGG_MULTI_PARM: + { + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(scaleProj[colProj]); + precisionAggPm.push_back(precisionProj[colProj]); + typeAggPm.push_back(typeProj[colProj]); + widthAggPm.push_back(width[colProj]); + colAggPm++; + // If the param is const + if (udafc) + { + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + } + else + { + throw QueryDataExcept("prep2PhasesAggregate: UDAF multi function with no parms", aggregateFuncErr); + } + ++udafcParamIdx; + } + break; + default: { ostringstream emsg; @@ -3283,6 +3450,7 @@ void TupleAggregateStep::prep2PhasesAggregate( map avgFuncMap; AGG_MAP aggDupFuncMap; + projColsUDAFIdx = 0; // copy over the groupby vector // update the outputColumnIndex if returned for (uint64_t i = 0; i < groupByPm.size(); i++) @@ -3292,6 +3460,8 @@ void TupleAggregateStep::prep2PhasesAggregate( } // locate the return column position in aggregated rowgroup from PM + // outIdx is i without the multi-columns, + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { uint32_t retKey = returnedColVec[i].first; @@ -3299,15 +3469,33 @@ void TupleAggregateStep::prep2PhasesAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colPm = -1; - // Is this a UDAF? use the function as part of the key. - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + continue; + } + // Is this a UDAF? use the function as part of the key. + pUDAFFunc = NULL; + udafc = NULL; if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIdx++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); @@ -3408,7 +3596,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep2PhasesAggregate: " << emsg << " oid=" @@ -3430,7 +3618,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByUm[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByUm[j]->fOutputColumnIndex = i; + groupByUm[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByUm[j]->fOutputColumnIndex; } @@ -3441,7 +3629,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (jobInfo.distinctColVec[j] == retKey) { if (groupByUm[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByUm[j]->fOutputColumnIndex = i; + groupByUm[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByUm[j]->fOutputColumnIndex; } @@ -3450,22 +3638,19 @@ void TupleAggregateStep::prep2PhasesAggregate( // a duplicate group by column if (dupGroupbyIndex != -1) functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } - - // update the aggregate function vector else { + // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i)); + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -3500,6 +3685,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (returnedColVec[i].second == AggregateColumn::AVG) avgFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // now fix the AVG function, locate the count(column) position @@ -3517,7 +3703,7 @@ void TupleAggregateStep::prep2PhasesAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -3545,7 +3731,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; @@ -3624,7 +3810,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector > aggColVec, aggNoDistColVec; set avgSet, avgDistSet; vector >& returnedColVec = jobInfo.returnedColVec; - uint32_t projColsUDAFIndex = 0; + // For UDAF + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -3691,18 +3881,18 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector groupByPm, groupByUm, groupByNoDist; vector functionVecPm, functionNoDistVec, functionVecUm; + list multiParmIndexes; uint32_t bigIntWidth = sizeof(int64_t); map, uint64_t> avgFuncDistMap; AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // associate the columns between projected RG and aggregate RG on PM // populated the aggregate columns // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation vector width; map projColPosMap; @@ -3838,13 +4028,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - + udafc = dynamic_cast((*it).get()); + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3853,10 +4041,9 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( break; } } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -4050,7 +4237,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } // Return column @@ -4069,9 +4256,44 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( typeAggPm.push_back(CalpontSystemCatalog::UBIGINT); widthAggPm.push_back(sizeof(uint64_t)); funct->fAuxColumnIndex = colAggPm++; + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } + case ROWAGG_MULTI_PARM: + { + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(scaleProj[colProj]); + precisionAggPm.push_back(precisionProj[colProj]); + typeAggPm.push_back(typeProj[colProj]); + widthAggPm.push_back(width[colProj]); + multiParmIndexes.push_back(colAggPm); + colAggPm++; + // If the param is const + if (udafc) + { + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + } + else + { + throw QueryDataExcept("prep2PhasesDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr); + } + ++udafcParamIdx; + } + break; + default: { ostringstream emsg; @@ -4087,6 +4309,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // associate the columns between the aggregate RGs on PM and UM without distinct aggregator // populated the returned columns { + int64_t multiParms = 0; + for (uint32_t idx = 0; idx < groupByPm.size(); idx++) { SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(idx, idx)); @@ -4098,16 +4322,27 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( SP_ROWAGG_FUNC_t funct; SP_ROWAGG_FUNC_t funcPm = functionVecPm[idx]; - // UDAF support + if (funcPm->fAggFunction == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + ++multiParms; + continue; + } + if (funcPm->fAggFunction == ROWAGG_UDAF) { RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); + if (!udafFuncCol) + { + throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + } funct.reset(new RowUDAFFunctionCol( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fOutputColumnIndex-multiParms, + udafFuncCol->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); + pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); } else { @@ -4115,19 +4350,27 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funcPm->fAggFunction, funcPm->fStatsFunction, funcPm->fOutputColumnIndex, - funcPm->fOutputColumnIndex, - funcPm->fAuxColumnIndex)); + funcPm->fOutputColumnIndex-multiParms, + funcPm->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); + pUDAFFunc = NULL; } } - posAggUm = posAggPm; - oidsAggUm = oidsAggPm; - keysAggUm = keysAggPm; - scaleAggUm = scaleAggPm; - precisionAggUm = precisionAggPm; - widthAggUm = widthAggPm; - typeAggUm = typeAggPm; + // Copy over the PM arrays to the UM. Skip any that are a multi-parm entry. + for (uint32_t idx = 0; idx < oidsAggPm.size(); ++idx) + { + if (find (multiParmIndexes.begin(), multiParmIndexes.end(), idx ) != multiParmIndexes.end()) + { + continue; + } + oidsAggUm.push_back(oidsAggPm[idx]); + keysAggUm.push_back(keysAggPm[idx]); + scaleAggUm.push_back(scaleAggPm[idx]); + precisionAggUm.push_back(precisionAggPm[idx]); + widthAggUm.push_back(widthAggPm[idx]); + typeAggUm.push_back(typeAggPm[idx]); + } } @@ -4137,6 +4380,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; + projColsUDAFIdx = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4150,15 +4398,44 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // locate the return column position in aggregated rowgroup from PM + // outIdx is i without the multi-columns, + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { pUDAFFunc = NULL; + udafc = NULL; uint32_t retKey = returnedColVec[i].first; RowAggFunctionType aggOp = functionIdMap(returnedColVec[i].second); RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colUm = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + ++multiParms; + continue; + } + + if (aggOp == ROWAGG_UDAF) + { + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIdx++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } + } + if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { @@ -4292,6 +4569,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( precisionAggDist.push_back(precisionAggUm[colUm]); typeAggDist.push_back(typeAggUm[colUm]); widthAggDist.push_back(widthAggUm[colUm]); + colUm -= multiParms; } // not a direct hit -- a returned column is not already in the RG from PMs @@ -4328,8 +4606,16 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(retKey); scaleAggDist.push_back(0); - precisionAggDist.push_back(19); - typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + if (isUnsigned(typeAggUm[colUm])) + { + precisionAggDist.push_back(20); + typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); + } + else + { + precisionAggDist.push_back(19); + typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + } widthAggDist.push_back(bigIntWidth); } } @@ -4377,7 +4663,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep2PhasesDistinctAggregate: " << emsg << " oid=" @@ -4401,7 +4687,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByNoDist[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByNoDist[j]->fOutputColumnIndex = i; + groupByNoDist[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByNoDist[j]->fOutputColumnIndex; } @@ -4410,23 +4696,19 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // a duplicate group by column if (dupGroupbyIndex != -1) functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } - - // update the aggregate function vector else { + // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - pUDAFFunc = udafc->getContext().getFunction(); - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i)); + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -4463,6 +4745,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // for (i // now fix the AVG function, locate the count(column) position @@ -4480,7 +4763,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -4540,7 +4823,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(5)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; @@ -4687,6 +4970,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); groupBySub.push_back(groupby); + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; + // tricky part : 2 function vectors // -- dummy function vector for sub-aggregator, which does distinct only // -- aggregate function on this distinct column for rowAggDist @@ -4694,6 +4982,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } if (returnedColVec[k].first != distinctColKey) continue; @@ -4715,7 +5008,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -4732,9 +5025,15 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( { vector functionSub1 = functionNoDistVec; vector functionSub2; + int64_t multiParms = 0; for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } // search non-distinct functions in functionVec vector::iterator it = functionVecUm.begin(); @@ -4752,7 +5051,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } else if (f->fAggFunction == ROWAGG_COUNT_ASTERISK || @@ -4773,7 +5072,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } diff --git a/dbcon/joblist/windowfunctionstep.cpp b/dbcon/joblist/windowfunctionstep.cpp index 4d24f0b4b..2a93f680b 100644 --- a/dbcon/joblist/windowfunctionstep.cpp +++ b/dbcon/joblist/windowfunctionstep.cpp @@ -569,6 +569,7 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) for (RetColsVector::iterator i = jobInfo.windowCols.begin(); i < jobInfo.windowCols.end(); i++) { + bool isUDAF = false; // window function type WindowFunctionColumn* wc = dynamic_cast(i->get()); uint64_t ridx = getColumnIndex(*i, colIndexMap, jobInfo); // result index @@ -590,6 +591,7 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) // if (boost::iequals(wc->functionName(),"UDAF_FUNC") if (wc->functionName() == "UDAF_FUNC") { + isUDAF = true; ++wfsUserFunctionCount; } @@ -646,10 +648,13 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) // column type for functor templates int ct = 0; + if (isUDAF) + { + ct = wc->getUDAFContext().getResultType(); + } // make sure index is in range - if (fields.size() > 1 && fields[1] >= 0 && static_cast(fields[1]) < types.size()) + else if (fields.size() > 1 && fields[1] >= 0 && static_cast(fields[1]) < types.size()) ct = types[fields[1]]; - // workaround for functions using "within group (order by)" syntax string fn = boost::to_upper_copy(wc->functionName()); diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 271508f42..8df06c6b4 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4038,6 +4038,10 @@ ParseTree* buildParseTree(Item_func* item, gp_walk_info& gwi, bool& nonSupport) ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { + // MCOL-1201 For UDAnF multiple parameters + vector selCols; + vector orderCols; + if (!(gwi.thd->infinidb_vtable.cal_conn_info)) gwi.thd->infinidb_vtable.cal_conn_info = (void*)(new cal_connection_info()); @@ -4054,6 +4058,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // N.B. argument_count() is the # of formal parms to the agg fcn. InifniDB only supports 1 argument // TODO: Support more than one parm +#if 0 if (isp->argument_count() != 1 && isp->sum_func() != Item_sum::GROUP_CONCAT_FUNC && isp->sum_func() != Item_sum::UDF_SUM_FUNC) { @@ -4061,7 +4066,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_MUL_ARG_AGG); return NULL; } - +#endif AggregateColumn* ac = NULL; if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) @@ -4084,9 +4089,14 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { gwi.fatalParseError = true; gwi.parseErrorText = "Non supported aggregate type on the select clause"; + if (ac) + delete ac; return NULL; } + try + { + // special parsing for group_concat if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) { @@ -4103,7 +4113,11 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); if (!rc || gwi.fatalParseError) + { + if (ac) + delete ac; return NULL; + } selCols.push_back(SRCP(rc)); } @@ -4123,6 +4137,8 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) if (id->val_int() > (int)selCols.size()) { gwi.fatalParseError = true; + if (ac) + delete ac; return NULL; } @@ -4135,6 +4151,8 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) if (!rc || gwi.fatalParseError) { + if (ac) + delete ac; return NULL; } } @@ -4147,6 +4165,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) rowCol->columnVec(selCols); (dynamic_cast(ac))->orderCols(orderCols); parm.reset(rowCol); + ac->aggParms().push_back(parm); if (gc->str_separator()) { @@ -4190,16 +4209,14 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // treat as count(*) if (ac->aggOp() == AggregateColumn::COUNT) ac->aggOp(AggregateColumn::COUNT_ASTERISK); - - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + parm.reset(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError)); + ac->constCol(parm); break; } case Item::NULL_ITEM: { - //ac->aggOp(AggregateColumn::COUNT); parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); - //ac->functionParms(parm); ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); break; } @@ -4259,7 +4276,6 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) if (gwi.fatalParseError) break; - //ac->functionParms(parm); break; } @@ -4270,7 +4286,6 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) if (rc) { parm.reset(rc); - //ac->functionParms(parm); break; } } @@ -4296,15 +4311,25 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); } + if (ac) + delete ac; return NULL; } + if (parm) + { + // MCOL-1201 multi-argument aggregate + ac->aggParms().push_back(parm); + } } } - if (parm) + // Get result type + // Modified for MCOL-1201 multi-argument aggregate + if (ac->aggParms().size() > 0) { - ac->functionParms(parm); - + // These are all one parm functions, so we can safely + // use the first parm for result type. + parm = ac->aggParms()[0]; if (isp->sum_func() == Item_sum::AVG_FUNC || isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) { @@ -4431,6 +4456,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) } else { + // UDAF result type will be set below. ac->resultType(parm->resultType()); } } @@ -4462,18 +4488,24 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will // be applied in ExeMgr. When the ExeMgr fix is available, this checking // will be taken out. + if (isp->sum_func() != Item_sum::UDF_SUM_FUNC) + { if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) { gwi.fatalParseError = true; gwi.parseErrorText = "No project column found for aggregate function"; + if (ac) + delete ac; return NULL; } else if (ac->constCol()) { gwi.count_asterisk_list.push_back(ac); } + } // For UDAF, populate the context and call the UDAF init() function. + // The return type is (should be) set in context by init(). if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) { UDAFColumn* udafc = dynamic_cast(ac); @@ -4489,26 +4521,50 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) context.setScale(udafc->resultType().scale); context.setPrecision(udafc->resultType().precision); - COL_TYPES colTypes; - execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; - - // Build the column type vector. For now, there is only one - colTypes.push_back(make_pair(udafc->functionParms()->alias(), udafc->functionParms()->resultType().colDataType)); + context.setParamCount(udafc->aggParms().size()); + ColumnDatum colType; + ColumnDatum colTypes[udafc->aggParms().size()]; + // Build the column type vector. + // Modified for MCOL-1201 multi-argument aggregate + for (uint32_t i = 0; i < udafc->aggParms().size(); ++i) + { + const execplan::CalpontSystemCatalog::ColType& resultType + = udafc->aggParms()[i]->resultType(); + colType.dataType = resultType.colDataType; + colType.precision = resultType.precision; + colType.scale = resultType.scale; + colTypes[i] = colType; + } // Call the user supplied init() - if (context.getFunction()->init(&context, colTypes) == mcsv1_UDAF::ERROR) + mcsv1sdk::mcsv1_UDAF* udaf = context.getFunction(); + if (!udaf) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "Aggregate Function " + context.getName() + " doesn't exist in the ColumnStore engine"; + if (ac) + delete ac; + return NULL; + } + if (udaf->init(&context, colTypes) == mcsv1_UDAF::ERROR) { gwi.fatalParseError = true; gwi.parseErrorText = udafc->getContext().getErrorMessage(); + if (ac) + delete ac; return NULL; } + // UDAF_OVER_REQUIRED means that this function is for Window + // Function only. Reject it here in aggregate land. if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) { gwi.fatalParseError = true; gwi.parseErrorText = logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, context.getName()); + if (ac) + delete ac; return NULL; } @@ -4521,7 +4577,24 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) udafc->resultType(ct); } } - + } + catch (std::logic_error e) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "error building Aggregate Function: "; + gwi.parseErrorText += e.what(); + if (ac) + delete ac; + return NULL; + } + catch (...) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "error building Aggregate Function: Unspecified exception"; + if (ac) + delete ac; + return NULL; + } return ac; } @@ -4674,6 +4747,7 @@ void gp_walk(const Item* item, void* arg) if (isp) { + // @bug 3669. trim trailing spaces for the compare value if (isp->result_type() == STRING_RESULT) { String val, *str = isp->val_str(&val); @@ -4684,7 +4758,10 @@ void gp_walk(const Item* item, void* arg) cval.assign(str->ptr(), str->length()); } + size_t spos = cval.find_last_not_of(" "); + if (spos != string::npos) + cval = cval.substr(0, spos + 1); gwip->rcWorkStack.push(new ConstantColumn(cval)); break; @@ -7838,8 +7915,15 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i setError(gwi.thd, ER_INTERNAL_ERROR, gwi.parseErrorText, gwi); return ER_CHECK_NOT_IMPLEMENTED; } - - (*coliter)->functionParms(minSc); + // Replace the last (presumably constant) object with minSc + if ((*coliter)->aggParms().empty()) + { + (*coliter)->aggParms().push_back(minSc); + } + else + { + (*coliter)->aggParms()[0] = minSc; + } } std::vector::iterator funciter; @@ -8005,9 +8089,9 @@ int cp_get_group_plan(THD* thd, SCSEP& csep, cal_impl_if::cal_group_info& gi) gwi.thd = thd; int status = getGroupPlan(gwi, select_lex, csep, gi); - cerr << "---------------- cp_get_group_plan EXECUTION PLAN ----------------" << endl; - cerr << *csep << endl ; - cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; +// cerr << "---------------- cp_get_group_plan EXECUTION PLAN ----------------" << endl; +// cerr << *csep << endl ; +// cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; if (status > 0) return ER_INTERNAL_ERROR; @@ -9949,7 +10033,15 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->functionParms(minSc); + // Replace the last (presumably constant) object with minSc + if ((*coliter)->aggParms().empty()) + { + (*coliter)->aggParms().push_back(minSc); + } + else + { + (*coliter)->aggParms()[0] = minSc; + } } std::vector::iterator funciter; diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 21649c2d0..3d4ee6ac3 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -781,8 +781,11 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, bool h //double double_val = *(double*)(&value); //f2->store(double_val); - if (f2->decimals() < (uint32_t)row.getScale(s)) - f2->dec = (uint32_t)row.getScale(s); + if ((f2->decimals() == DECIMAL_NOT_SPECIFIED && row.getScale(s) > 0) + || f2->decimals() < row.getScale(s)) + { + f2->dec = row.getScale(s); + } f2->store(dl); @@ -5275,8 +5278,6 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE execplan::CalpontSelectExecutionPlan::ColumnMap::iterator colMapIter; execplan::CalpontSelectExecutionPlan::ColumnMap::iterator condColMapIter; execplan::ParseTree* ptIt; - execplan::ReturnedColumn* rcIt; - for (TABLE_LIST* tl = gi.groupByTables; tl; tl = tl->next_local) { mapiter = ci->tableMap.find(tl->table); @@ -5581,19 +5582,14 @@ internal_error: */ /*********************************************************** * DESCRIPTION: - * Return a result record for each - * group_by_handler::next_row() call. + * Return a result record for each group_by_handler::next_row() call. * PARAMETERS: - * group_hand - group by handler, that preserves initial - * table and items lists. . - * table - TABLE pointer The table to save the result - * set in. + * group_hand - group by handler, that preserves initial table and items lists. . + * table - TABLE pointer The table to save the result set in. * RETURN: * 0 if success - * HA_ERR_END_OF_FILE if the record set has come to - * an end - * others if something went wrong whilst getting the - * result set + * HA_ERR_END_OF_FILE if the record set has come to an end + * others if something went wrong whilst getting the result set ***********************************************************/ int ha_calpont_impl_group_by_next(ha_calpont_group_by_handler* group_hand, TABLE* table) { diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp index 4b648cb15..8d68a6260 100644 --- a/dbcon/mysql/ha_window_function.cpp +++ b/dbcon/mysql/ha_window_function.cpp @@ -340,6 +340,7 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n ac->distinct(item_sum->has_with_distinct()); Window_spec* win_spec = wf->window_spec; SRCP srcp; + CalpontSystemCatalog::ColType ct; // For return type // arguments vector funcParms; @@ -370,18 +371,25 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n context.setColWidth(rt.colWidth); context.setScale(rt.scale); context.setPrecision(rt.precision); + context.setParamCount(funcParms.size()); + + mcsv1sdk::ColumnDatum colType; + mcsv1sdk::ColumnDatum colTypes[funcParms.size()]; // Turn on the Analytic flag so the function is aware it is being called // as a Window Function. context.setContextFlag(CONTEXT_IS_ANALYTIC); - COL_TYPES colTypes; - execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; - // Build the column type vector. + // Modified for MCOL-1201 multi-argument aggregate for (size_t i = 0; i < funcParms.size(); ++i) { - colTypes.push_back(make_pair(funcParms[i]->alias(), funcParms[i]->resultType().colDataType)); + const execplan::CalpontSystemCatalog::ColType& resultType + = funcParms[i]->resultType(); + colType.dataType = resultType.colDataType; + colType.precision = resultType.precision; + colType.scale = resultType.scale; + colTypes[i] = colType; } // Call the user supplied init() @@ -401,7 +409,6 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n } // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; ct.colDataType = context.getResultType(); ct.colWidth = context.getColWidth(); ct.scale = context.getScale(); @@ -419,10 +426,10 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n { case Item_sum::UDF_SUM_FUNC: { - uint64_t bIgnoreNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)); - char sIgnoreNulls[18]; - sprintf(sIgnoreNulls, "%lu", bIgnoreNulls); - srcp.reset(new ConstantColumn(sIgnoreNulls, (uint64_t)bIgnoreNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT + uint64_t bRespectNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) ? 0 : 1; + char sRespectNulls[18]; + sprintf(sRespectNulls, "%lu", bRespectNulls); + srcp.reset(new ConstantColumn(sRespectNulls, (uint64_t)bRespectNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT funcParms.push_back(srcp); break; } @@ -881,11 +888,13 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n return NULL; } - ac->resultType(colType_MysqlToIDB(item_sum)); - - // bug5736. Make the result type double for some window functions when - // infinidb_double_for_decimal_math is set. - ac->adjustResultType(); + if (item_sum->sum_func() != Item_sum::UDF_SUM_FUNC) + { + ac->resultType(colType_MysqlToIDB(item_sum)); + // bug5736. Make the result type double for some window functions when + // infinidb_double_for_decimal_math is set. + ac->adjustResultType(); + } ac->expressionId(ci->expressionId++); diff --git a/dbcon/mysql/install_calpont_mysql.sh b/dbcon/mysql/install_calpont_mysql.sh index e8eb5b2b0..e04371549 100755 --- a/dbcon/mysql/install_calpont_mysql.sh +++ b/dbcon/mysql/install_calpont_mysql.sh @@ -84,6 +84,7 @@ CREATE FUNCTION idbpartition RETURNS STRING soname 'libcalmysql.so'; CREATE FUNCTION idblocalpm RETURNS INTEGER soname 'libcalmysql.so'; CREATE FUNCTION mcssystemready RETURNS INTEGER soname 'libcalmysql.so'; CREATE FUNCTION mcssystemreadonly RETURNS INTEGER soname 'libcalmysql.so'; +CREATE AGGREGATE FUNCTION regr_avgx RETURNS REAL soname 'libcalmysql.dll'; CREATE DATABASE IF NOT EXISTS infinidb_vtable; CREATE DATABASE IF NOT EXISTS infinidb_querystats; diff --git a/primitives/primproc/batchprimitiveprocessor.cpp b/primitives/primproc/batchprimitiveprocessor.cpp index bc56a7430..019761d39 100644 --- a/primitives/primproc/batchprimitiveprocessor.cpp +++ b/primitives/primproc/batchprimitiveprocessor.cpp @@ -1677,15 +1677,11 @@ void BatchPrimitiveProcessor::execute() } catch (logging::QueryDataExcept& qex) { - ostringstream os; - os << qex.what() << endl; - writeErrorMsg(os.str(), qex.errorCode()); + writeErrorMsg(qex.what(), qex.errorCode()); } catch (logging::DictionaryBufferOverflow& db) { - ostringstream os; - os << db.what() << endl; - writeErrorMsg(os.str(), db.errorCode()); + writeErrorMsg(db.what(), db.errorCode()); } catch (scalar_exception& se) { @@ -1758,15 +1754,11 @@ void BatchPrimitiveProcessor::execute() } catch (IDBExcept& iex) { - ostringstream os; - os << iex.what() << endl; - writeErrorMsg(os.str(), iex.errorCode(), true, false); + writeErrorMsg(iex.what(), iex.errorCode(), true, false); } catch (const std::exception& ex) { - ostringstream os; - os << ex.what() << endl; - writeErrorMsg(os.str(), logging::batchPrimitiveProcessorErr); + writeErrorMsg(ex.what(), logging::batchPrimitiveProcessorErr); } catch (...) { diff --git a/utils/common/any.hpp b/utils/common/any.hpp index be0ca679b..63d05d3d2 100755 --- a/utils/common/any.hpp +++ b/utils/common/any.hpp @@ -9,123 +9,139 @@ * http://www.boost.org/LICENSE_1_0.txt */ +#include #include +#include namespace static_any { namespace anyimpl { + struct empty_any + { + }; - struct bad_any_cast - { - }; + struct base_any_policy + { + virtual void static_delete(void** x) = 0; + virtual void copy_from_value(void const* src, void** dest) = 0; + virtual void clone(void* const* src, void** dest) = 0; + virtual void move(void* const* src, void** dest) = 0; + virtual void* get_value(void** src) = 0; + virtual size_t get_size() = 0; + }; - struct empty_any - { - }; + template + struct typed_base_any_policy : base_any_policy + { + virtual size_t get_size() + { + return sizeof(T); + } + }; - struct base_any_policy - { - virtual void static_delete(void** x) = 0; - virtual void copy_from_value(void const* src, void** dest) = 0; - virtual void clone(void* const* src, void** dest) = 0; - virtual void move(void* const* src, void** dest) = 0; - virtual void* get_value(void** src) = 0; - virtual size_t get_size() = 0; - }; + template + struct small_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) + { + } + virtual void copy_from_value(void const* src, void** dest) + { + new(dest) T(*reinterpret_cast(src)); + } + virtual void clone(void* const* src, void** dest) + { + *dest = *src; + } + virtual void move(void* const* src, void** dest) + { + *dest = *src; + } + virtual void* get_value(void** src) + { + return reinterpret_cast(src); + } + }; - template - struct typed_base_any_policy : base_any_policy - { - virtual size_t get_size() { return sizeof(T); } - }; - - template - struct small_any_policy : typed_base_any_policy - { - virtual void static_delete(void** x) { } - virtual void copy_from_value(void const* src, void** dest) - { new(dest) T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) { *dest = *src; } - virtual void move(void* const* src, void** dest) { *dest = *src; } - virtual void* get_value(void** src) { return reinterpret_cast(src); } - }; - - template - struct big_any_policy : typed_base_any_policy - { - virtual void static_delete(void** x) + template + struct big_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) { if (*x) - delete(*reinterpret_cast(x)); + delete(*reinterpret_cast(x)); *x = NULL; } - virtual void copy_from_value(void const* src, void** dest) + virtual void copy_from_value(void const* src, void** dest) { - *dest = new T(*reinterpret_cast(src)); + *dest = new T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) + virtual void clone(void* const* src, void** dest) { - *dest = new T(**reinterpret_cast(src)); + *dest = new T(**reinterpret_cast(src)); } - virtual void move(void* const* src, void** dest) + virtual void move(void* const* src, void** dest) { - (*reinterpret_cast(dest))->~T(); - **reinterpret_cast(dest) = **reinterpret_cast(src); + (*reinterpret_cast(dest))->~T(); + **reinterpret_cast(dest) = **reinterpret_cast(src); } - virtual void* get_value(void** src) { return *src; } - }; + virtual void* get_value(void** src) + { + return *src; + } + }; - template - struct choose_policy - { - typedef big_any_policy type; - }; + template + struct choose_policy + { + typedef big_any_policy type; + }; - template - struct choose_policy - { - typedef small_any_policy type; - }; + template + struct choose_policy + { + typedef small_any_policy type; + }; - struct any; + struct any; - /// Choosing the policy for an any type is illegal, but should never happen. - /// This is designed to throw a compiler error. - template<> - struct choose_policy - { - typedef void type; - }; + /// Choosing the policy for an any type is illegal, but should never happen. + /// This is designed to throw a compiler error. + template<> + struct choose_policy + { + typedef void type; + }; - /// Specializations for small types. - #define SMALL_POLICY(TYPE) template<> struct \ - choose_policy { typedef small_any_policy type; }; + /// Specializations for small types. +#define SMALL_POLICY(TYPE) template<> struct \ + choose_policy { typedef small_any_policy type; }; - SMALL_POLICY(char); - SMALL_POLICY(signed char); - SMALL_POLICY(unsigned char); - SMALL_POLICY(signed short); - SMALL_POLICY(unsigned short); - SMALL_POLICY(signed int); - SMALL_POLICY(unsigned int); - SMALL_POLICY(signed long); - SMALL_POLICY(unsigned long); - SMALL_POLICY(signed long long); - SMALL_POLICY(unsigned long long); - SMALL_POLICY(float); - SMALL_POLICY(double); - SMALL_POLICY(bool); + SMALL_POLICY(char); + SMALL_POLICY(signed char); + SMALL_POLICY(unsigned char); + SMALL_POLICY(signed short); + SMALL_POLICY(unsigned short); + SMALL_POLICY(signed int); + SMALL_POLICY(unsigned int); + SMALL_POLICY(signed long); + SMALL_POLICY(unsigned long); + SMALL_POLICY(signed long long); + SMALL_POLICY(unsigned long long); + SMALL_POLICY(float); + SMALL_POLICY(double); + SMALL_POLICY(bool); - #undef SMALL_POLICY +#undef SMALL_POLICY - /// This function will return a different policy for each type. - template - base_any_policy* get_policy() - { - static typename choose_policy::type policy; - return &policy; - }; + /// This function will return a different policy for each type. + template + base_any_policy* get_policy() + { + static typename choose_policy::type policy; + return &policy; + }; } class any @@ -139,37 +155,40 @@ public: /// Initializing constructor. template any(const T& x) - : policy(anyimpl::get_policy()), object(NULL) + : policy(anyimpl::get_policy()), object(NULL) { assign(x); } /// Empty constructor. any() - : policy(anyimpl::get_policy()), object(NULL) - { } + : policy(anyimpl::get_policy()), object(NULL) + { + } /// Special initializing constructor for string literals. any(const char* x) - : policy(anyimpl::get_policy()), object(NULL) - { + : policy(anyimpl::get_policy()), object(NULL) + { assign(x); } /// Copy constructor. any(const any& x) - : policy(anyimpl::get_policy()), object(NULL) - { + : policy(anyimpl::get_policy()), object(NULL) + { assign(x); } /// Destructor. - ~any() { + ~any() + { policy->static_delete(&object); } /// Assignment function from another any. - any& assign(const any& x) { + any& assign(const any& x) + { reset(); policy = x.policy; policy->clone(&x.object, &object); @@ -178,7 +197,8 @@ public: /// Assignment function. template - any& assign(const T& x) { + any& assign(const T& x) + { reset(); policy = anyimpl::get_policy(); policy->copy_from_value(&x, &object); @@ -197,8 +217,42 @@ public: return assign(x); } + /// Less than operator for sorting + bool operator<(const any& x) const + { + if (policy == x.policy) + { + void* p1 = const_cast(object); + void* p2 = const_cast(x.object); + return memcmp(policy->get_value(&p1), + x.policy->get_value(&p2), + policy->get_size()) < 0 ? 1 : 0; + } + return 0; + } + + /// equal operator + bool operator==(const any& x) const + { + if (policy == x.policy) + { + void* p1 = const_cast(object); + void* p2 = const_cast(x.object); + return memcmp(policy->get_value(&p1), + x.policy->get_value(&p2), + policy->get_size()) == 0 ? 1 : 0; + } + return 0; + } + /// Utility functions - any& swap(any& x) { + uint8_t getHash() const + { + void* p1 = const_cast(object); + return *(uint64_t*)policy->get_value(&p1) % 4048; + } + any& swap(any& x) + { std::swap(policy, x.policy); std::swap(object, x.object); return *this; @@ -206,27 +260,32 @@ public: /// Cast operator. You can only cast to the original type. template - T& cast() { - if (policy != anyimpl::get_policy()) - throw anyimpl::bad_any_cast(); + T& cast() + { + if (policy != anyimpl::get_policy()) + throw std::runtime_error("static_any: type mismatch in cast"); T* r = reinterpret_cast(policy->get_value(&object)); return *r; } /// Returns true if the any contains no value. - bool empty() const { + bool empty() const + { return policy == anyimpl::get_policy(); } /// Frees any allocated memory, and sets the value to NULL. - void reset() { + void reset() + { policy->static_delete(&object); policy = anyimpl::get_policy(); } /// Returns true if the two types are the same. - bool compatible(const any& x) const { + bool compatible(const any& x) const + { return policy == x.policy; } }; + } diff --git a/utils/common/common.vpj b/utils/common/common.vpj index 69059884c..ea67e04ba 100755 --- a/utils/common/common.vpj +++ b/utils/common/common.vpj @@ -200,6 +200,7 @@ + @@ -208,6 +209,7 @@ Name="Header Files" Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if"> + diff --git a/utils/loggingcpp/errorcodes.cpp b/utils/loggingcpp/errorcodes.cpp index 60919c906..4b4196800 100644 --- a/utils/loggingcpp/errorcodes.cpp +++ b/utils/loggingcpp/errorcodes.cpp @@ -29,7 +29,7 @@ using namespace std; namespace logging { -ErrorCodes::ErrorCodes(): fErrorCodes(), fPreamble("An unexpected condition within the query caused an internal processing error within InfiniDB. Please check the log files for more details. Additional Information: ") +ErrorCodes::ErrorCodes(): fErrorCodes(), fPreamble("An unexpected condition within the query caused an internal processing error within Columnstore. Please check the log files for more details. Additional Information: ") { fErrorCodes[batchPrimitiveStepErr] = "error in BatchPrimitiveStep."; fErrorCodes[tupleBPSErr] = "error in TupleBPS."; diff --git a/utils/messageqcpp/bytestream.h b/utils/messageqcpp/bytestream.h index d1a3f4988..f8453843e 100644 --- a/utils/messageqcpp/bytestream.h +++ b/utils/messageqcpp/bytestream.h @@ -35,6 +35,7 @@ #include "exceptclasses.h" #include "serializeable.h" +#include "any.hpp" class ByteStreamTestSuite; diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 8d110cfc8..bead74aff 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -215,6 +215,22 @@ inline string getStringNullValue() namespace rowgroup { +const std::string typeStr(""); +const static_any::any& RowAggregation::charTypeId((char)1); +const static_any::any& RowAggregation::scharTypeId((signed char)1); +const static_any::any& RowAggregation::shortTypeId((short)1); +const static_any::any& RowAggregation::intTypeId((int)1); +const static_any::any& RowAggregation::longTypeId((long)1); +const static_any::any& RowAggregation::llTypeId((long long)1); +const static_any::any& RowAggregation::ucharTypeId((unsigned char)1); +const static_any::any& RowAggregation::ushortTypeId((unsigned short)1); +const static_any::any& RowAggregation::uintTypeId((unsigned int)1); +const static_any::any& RowAggregation::ulongTypeId((unsigned long)1); +const static_any::any& RowAggregation::ullTypeId((unsigned long long)1); +const static_any::any& RowAggregation::floatTypeId((float)1); +const static_any::any& RowAggregation::doubleTypeId((double)1); +const static_any::any& RowAggregation::strTypeId(typeStr); + KeyStorage::KeyStorage(const RowGroup& keys, Row** tRow) : tmpRow(tRow), rg(keys) { RGData data(rg); @@ -691,7 +707,8 @@ RowAggregation::RowAggregation(const vector& rowAggGroupByCol RowAggregation::RowAggregation(const RowAggregation& rhs): fAggMapPtr(NULL), fRowGroupOut(NULL), fTotalRowCount(0), fMaxTotalRowCount(AGG_ROWGROUP_SIZE), - fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0) + fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0), + fRGContext(rhs.fRGContext) { //fGroupByCols.clear(); //fFunctionCols.clear(); @@ -756,7 +773,6 @@ void RowAggregation::addRowGroup(const RowGroup* pRows, vector& in { // this function is for threaded aggregation, which is for group by and distinct. // if (countSpecial(pRows)) - Row rowIn; pRows->initRow(&rowIn); @@ -790,7 +806,7 @@ void RowAggregation::setJoinRowGroups(vector* pSmallSideRG, RowGroup* } //------------------------------------------------------------------------------ -// For UDAF, we need to sometimes start a new context. +// For UDAF, we need to sometimes start a new fRGContext. // // This will be called any number of times by each of the batchprimitiveprocessor // threads on the PM and by multple threads on the UM. It must remain @@ -801,29 +817,29 @@ void RowAggregation::resetUDAF(uint64_t funcColID) // Get the UDAF class pointer and store in the row definition object. RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColID].get()); - // resetUDAF needs to be re-entrant. Since we're modifying the context object - // by creating a new userData, we need a local copy. The copy constructor - // doesn't copy userData. - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + // RowAggregation and it's functions need to be re-entrant which means + // each instance (thread) needs its own copy of the context object. + // Note: operator=() doesn't copy userData. + fRGContext = rowUDAF->fUDAFContext; // Call the user reset for the group userData. Since, at this point, // context's userData will be NULL, reset will generate a new one. mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->reset(&rgContext); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } fRow.setUserDataStore(fRowGroupOut->getRGData()->getUserDataStore()); - fRow.setUserData(rgContext, - rgContext.getUserDataSP(), - rgContext.getUserDataSize(), + fRow.setUserData(fRGContext, + fRGContext.getUserDataSP(), + fRGContext.getUserDataSize(), rowUDAF->fAuxColumnIndex); - rgContext.setUserData(NULL); // Prevents calling deleteUserData on the context. + fRGContext.setUserData(NULL); // Prevents calling deleteUserData on the fRGContext. } //------------------------------------------------------------------------------ @@ -873,7 +889,6 @@ void RowAggregation::initialize() } } - // Save the RowGroup data pointer fResultDataVec.push_back(fRowGroupOut->getRGData()); @@ -1658,10 +1673,11 @@ void RowAggregation::updateEntry(const Row& rowIn) { for (uint64_t i = 0; i < fFunctionCols.size(); i++) { - int64_t colIn = fFunctionCols[i]->fInputColumnIndex; - int64_t colOut = fFunctionCols[i]->fOutputColumnIndex; + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[i]; + int64_t colIn = pFunctionCol->fInputColumnIndex; + int64_t colOut = pFunctionCol->fOutputColumnIndex; - switch (fFunctionCols[i]->fAggFunction) + switch (pFunctionCol->fAggFunction) { case ROWAGG_COUNT_COL_NAME: @@ -1675,7 +1691,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_MIN: case ROWAGG_MAX: case ROWAGG_SUM: - doMinMaxSum(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction); + doMinMaxSum(rowIn, colIn, colOut, pFunctionCol->fAggFunction); break; case ROWAGG_AVG: @@ -1692,7 +1708,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_BIT_OR: case ROWAGG_BIT_XOR: { - doBitOp(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction); + doBitOp(rowIn, colIn, colOut, pFunctionCol->fAggFunction); break; } @@ -1707,17 +1723,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF); - } - else - { - throw logic_error("(3)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colOut + 1, i); break; } @@ -1725,7 +1731,7 @@ void RowAggregation::updateEntry(const Row& rowIn) { std::ostringstream errmsg; errmsg << "RowAggregation: function (id = " << - (uint64_t) fFunctionCols[i]->fAggFunction << ") is not supported."; + (uint64_t) pFunctionCol->fAggFunction << ") is not supported."; cerr << errmsg.str() << endl; throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); break; @@ -1996,132 +2002,252 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu fRow.setLongDoubleField(fRow.getLongDoubleField(colAux + 1) + valIn * valIn, colAux + 1); } -void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF) +void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, + int64_t colAux, uint64_t& funcColsIdx) { - std::vector valsIn; - execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupIn.getColTypes()[colIn]; - std::vector dataFlags; - - // Get the context for this rowGroup. Make a copy so we're thread safe. - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); - - // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; - - if (isNull(&fRowGroupIn, rowIn, colIn) == true) + uint32_t paramCount = fRGContext.getParameterCount(); + // The vector of parameters to be sent to the UDAF + mcsv1sdk::ColumnDatum valsIn[paramCount]; + uint32_t dataFlags[paramCount]; + ConstantColumn* cc; + bool bIsNull = false; + execplan::CalpontSystemCatalog::ColDataType colDataType; + for (uint32_t i = 0; i < paramCount; ++i) { - if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + // If UDAF_IGNORE_NULLS is on, bIsNull gets set the first time + // we find a null. We still need to eat the rest of the parameters + // to sync updateEntry + if (bIsNull) { - return; + ++funcColsIdx; + continue; + } + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + // Turn on NULL flags + dataFlags[i] = 0; + + // If this particular parameter is a constant, then we need + // to acces the constant value rather than a row value. + cc = NULL; + if (pFunctionCol->fpConstCol) + { + cc = dynamic_cast(pFunctionCol->fpConstCol.get()); } - flag |= mcsv1sdk::PARAM_IS_NULL; - } - - flags.push_back(flag); - rgContext.setDataFlags(&flags); - - mcsv1sdk::ColumnDatum datum; - - if (!rgContext.isParamNull(0)) - { - switch (colDataType) + if ((cc && cc->type() == ConstantColumn::NULLDATA) + || (!cc && isNull(&fRowGroupIn, rowIn, colIn) == true)) { - case execplan::CalpontSystemCatalog::TINYINT: - case execplan::CalpontSystemCatalog::SMALLINT: - case execplan::CalpontSystemCatalog::MEDINT: - case execplan::CalpontSystemCatalog::INT: - case execplan::CalpontSystemCatalog::BIGINT: - case execplan::CalpontSystemCatalog::DECIMAL: - case execplan::CalpontSystemCatalog::UDECIMAL: + if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - datum.scale = fRowGroupIn.getScale()[colIn]; - datum.precision = fRowGroupIn.getPrecision()[colIn]; - break; + bIsNull = true; + ++funcColsIdx; + continue; } + dataFlags[i] |= mcsv1sdk::PARAM_IS_NULL; + } - case execplan::CalpontSystemCatalog::UTINYINT: - case execplan::CalpontSystemCatalog::USMALLINT: - case execplan::CalpontSystemCatalog::UMEDINT: - case execplan::CalpontSystemCatalog::UINT: - case execplan::CalpontSystemCatalog::UBIGINT: + if (cc) + { + colDataType = cc->resultType().colDataType; + } + else + { + colDataType = fRowGroupIn.getColTypes()[colIn]; + } + if (!(dataFlags[i] & mcsv1sdk::PARAM_IS_NULL)) + { + switch (colDataType) { - datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); - break; - } + case execplan::CalpontSystemCatalog::TINYINT: + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + case execplan::CalpontSystemCatalog::INT: + case execplan::CalpontSystemCatalog::BIGINT: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + if (cc) + { + datum.columnData = cc->getIntVal(const_cast(rowIn), bIsNull); + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + } + break; + } + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + { + datum.dataType = colDataType; + if (cc) + { + datum.columnData = cc->getDecimalVal(const_cast(rowIn), bIsNull).value; + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + } + break; + } - case execplan::CalpontSystemCatalog::DOUBLE: - case execplan::CalpontSystemCatalog::UDOUBLE: - { - datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; - datum.columnData = rowIn.getDoubleField(colIn); - break; - } + case execplan::CalpontSystemCatalog::UTINYINT: + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + case execplan::CalpontSystemCatalog::UINT: + case execplan::CalpontSystemCatalog::UBIGINT: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + if (cc) + { + datum.columnData = cc->getUintVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getUintField(colIn); + } + break; + } - case execplan::CalpontSystemCatalog::FLOAT: - case execplan::CalpontSystemCatalog::UFLOAT: - { - datum.dataType = execplan::CalpontSystemCatalog::FLOAT; - datum.columnData = rowIn.getFloatField(colIn); - break; - } + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + { + datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; + if (cc) + { + datum.columnData = cc->getDoubleVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getDoubleField(colIn); + } + break; + } - case execplan::CalpontSystemCatalog::DATE: - case execplan::CalpontSystemCatalog::DATETIME: - { - datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); - break; - } + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + { + datum.dataType = execplan::CalpontSystemCatalog::FLOAT; + if (cc) + { + datum.columnData = cc->getFloatVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getFloatField(colIn); + } + break; + } - case execplan::CalpontSystemCatalog::TIME: - { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - break; - } + case execplan::CalpontSystemCatalog::DATE: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + if (cc) + { + datum.columnData = cc->getDateIntVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getUintField(colIn); + } + break; + } + case execplan::CalpontSystemCatalog::DATETIME: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + if (cc) + { + datum.columnData = cc->getDatetimeIntVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getUintField(colIn); + } + break; + } - case execplan::CalpontSystemCatalog::CHAR: - case execplan::CalpontSystemCatalog::VARCHAR: - case execplan::CalpontSystemCatalog::TEXT: - case execplan::CalpontSystemCatalog::VARBINARY: - case execplan::CalpontSystemCatalog::CLOB: - case execplan::CalpontSystemCatalog::BLOB: - { - datum.dataType = colDataType; - datum.columnData = rowIn.getStringField(colIn); - break; - } + case execplan::CalpontSystemCatalog::TIME: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + if (cc) + { + datum.columnData = cc->getTimeIntVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getIntField(colIn); + } + break; + } - default: - { - std::ostringstream errmsg; - errmsg << "RowAggregation " << rgContext.getName() << - ": No logic for data type: " << colDataType; - throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); - break; + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + { + datum.dataType = colDataType; + if (cc) + { + datum.columnData = cc->getStrVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getStringField(colIn); + } + break; + } + + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation " << fRGContext.getName() << + ": No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } } } - } - valsIn.push_back(datum); + // MCOL-1201: If there are multiple parameters, the next fFunctionCols + // will have the column used. By incrementing the funcColsIdx (passed by + // ref, we also increment the caller's index. + if (fFunctionCols.size() > funcColsIdx + 1 + && fFunctionCols[funcColsIdx+1]->fAggFunction == ROWAGG_MULTI_PARM) + { + ++funcColsIdx; + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; + colIn = pFunctionCol->fInputColumnIndex; + colOut = pFunctionCol->fOutputColumnIndex; + } + else + { + break; + } + } // The intermediate values are stored in userData referenced by colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setDataFlags(dataFlags); + fRGContext.setUserData(fRow.getUserData(colAux)); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->nextValue(&rgContext, valsIn); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { + RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColsIdx].get()); rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } } @@ -2218,6 +2344,7 @@ RowAggregationUM::RowAggregationUM(const RowAggregationUM& rhs) : fHasAvg(rhs.fHasAvg), fKeyOnHeap(rhs.fKeyOnHeap), fHasStatsFunc(rhs.fHasStatsFunc), + fHasUDAF(rhs.fHasUDAF), fExpression(rhs.fExpression), fTotalMemUsage(rhs.fTotalMemUsage), fRm(rhs.fRm), @@ -2415,17 +2542,7 @@ void RowAggregationUM::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); - } - else - { - throw logic_error("(5)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colAux, i); break; } @@ -2585,22 +2702,6 @@ void RowAggregationUM::calculateAvgColumns() // Sets the value from valOut into column colOut, performing any conversions. void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) { - static const static_any::any& charTypeId((char)1); - static const static_any::any& scharTypeId((signed char)1); - static const static_any::any& shortTypeId((short)1); - static const static_any::any& intTypeId((int)1); - static const static_any::any& longTypeId((long)1); - static const static_any::any& llTypeId((long long)1); - static const static_any::any& ucharTypeId((unsigned char)1); - static const static_any::any& ushortTypeId((unsigned short)1); - static const static_any::any& uintTypeId((unsigned int)1); - static const static_any::any& ulongTypeId((unsigned long)1); - static const static_any::any& ullTypeId((unsigned long long)1); - static const static_any::any& floatTypeId((float)1); - static const static_any::any& doubleTypeId((double)1); - static const std::string typeStr(""); - static const static_any::any& strTypeId(typeStr); - execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; if (valOut.empty()) @@ -2609,6 +2710,179 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) return; } + int64_t intOut = 0; + uint64_t uintOut = 0; + float floatOut = 0.0; + double doubleOut = 0.0; + ostringstream oss; + std::string strOut; + + bool bSetSuccess = false; + switch (colDataType) + { + case execplan::CalpontSystemCatalog::BIT: + case execplan::CalpontSystemCatalog::TINYINT: + if (valOut.compatible(charTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + else if (valOut.compatible(scharTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + if (bSetSuccess) + { + fRow.setIntField<1>(intOut, colOut); + } + break; + + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + if (valOut.compatible(shortTypeId)) + { + intOut = valOut.cast(); + fRow.setIntField<2>(intOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::INT: + if (valOut.compatible(uintTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + else if (valOut.compatible(longTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + if (bSetSuccess) + { + fRow.setIntField<4>(intOut, colOut); + } + break; + + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + if (valOut.compatible(llTypeId)) + { + intOut = valOut.cast(); + fRow.setIntField<8>(intOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UTINYINT: + if (valOut.compatible(ucharTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<1>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + if (valOut.compatible(ushortTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<2>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UINT: + if (valOut.compatible(uintTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<4>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UBIGINT: + if (valOut.compatible(ulongTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<8>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + if (valOut.compatible(ulongTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<8>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + if (valOut.compatible(floatTypeId)) + { + floatOut = valOut.cast(); + fRow.setFloatField(floatOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + if (valOut.compatible(doubleTypeId)) + { + doubleOut = valOut.cast(); + fRow.setDoubleField(doubleOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + fRow.setStringField(strOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + fRow.setVarBinaryField(strOut, colOut); + bSetSuccess = true; + } + break; + + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation: No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } + } + if (!bSetSuccess) + { + SetUDAFAnyValue(valOut, colOut); + } +} + +void RowAggregationUM::SetUDAFAnyValue(static_any::any& valOut, int64_t colOut) +{ + execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; + // This may seem a bit convoluted. Users shouldn't return a type // that they didn't set in mcsv1_UDAF::init(), but this // handles whatever return type is given and casts @@ -2814,7 +3088,7 @@ void RowAggregationUM::calculateUDAFColumns() continue; rowUDAF = dynamic_cast(fFunctionCols[i].get()); - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + fRGContext = rowUDAF->fUDAFContext; int64_t colOut = rowUDAF->fOutputColumnIndex; int64_t colAux = rowUDAF->fAuxColumnIndex; @@ -2826,26 +3100,26 @@ void RowAggregationUM::calculateUDAFColumns() fRowGroupOut->getRow(j, &fRow); // Turn the NULL flag off. We can't know NULL at this point - rgContext.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); // The intermediate values are stored in colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setUserData(fRow.getUserData(colAux)); // Call the UDAF evaluate function mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->evaluate(&rgContext, valOut); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); } - rgContext.setUserData(NULL); + fRGContext.setUserData(NULL); } } @@ -3116,54 +3390,60 @@ void RowAggregationUM::doNullConstantAggregate(const ConstantAggData& aggData, u { // For a NULL constant, call nextValue with NULL and then evaluate. bool bInterrupted = false; - mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext); - context.setInterrupted(bInterrupted); - context.createUserData(); + fRGContext.setInterrupted(bInterrupted); + fRGContext.createUserData(); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - std::vector valsIn; + mcsv1sdk::ColumnDatum valsIn[1]; // Call a reset, then nextValue, then execute. This will evaluate // the UDAF for the constant. - rc = context.getFunction()->reset(&context); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } - +#if 0 + uint32_t dataFlags[fRGContext.getParameterCount()]; + for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) + { + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + // Turn on NULL flags + dataFlags[i] = 0; + } +#endif // Turn the NULL and CONSTANT flags on. - std::vector flags; - uint32_t flag = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; - flags.push_back(flag); - context.setDataFlags(&flags); + uint32_t flags[1]; + flags[0] = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; + fRGContext.setDataFlags(flags); // Create a dummy datum - mcsv1sdk::ColumnDatum datum; + mcsv1sdk::ColumnDatum& datum = valsIn[0]; datum.dataType = execplan::CalpontSystemCatalog::BIGINT; datum.columnData = 0; - valsIn.push_back(datum); - rc = context.getFunction()->nextValue(&context, valsIn); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } static_any::any valOut; - rc = context.getFunction()->evaluate(&context, valOut); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); - context.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); } break; @@ -3460,30 +3740,28 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData case ROWAGG_UDAF: { bool bInterrupted = false; - mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext); - context.setInterrupted(bInterrupted); - context.createUserData(); + fRGContext.setInterrupted(bInterrupted); + fRGContext.createUserData(); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - std::vector valsIn; + mcsv1sdk::ColumnDatum valsIn[1]; // Call a reset, then nextValue, then execute. This will evaluate // the UDAF for the constant. - rc = context.getFunction()->reset(&context); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Turn the CONSTANT flags on. - std::vector flags; - uint32_t flag = mcsv1sdk::PARAM_IS_CONSTANT; - flags.push_back(flag); - context.setDataFlags(&flags); + uint32_t flags[1]; + flags[0] = mcsv1sdk::PARAM_IS_CONSTANT; + fRGContext.setDataFlags(flags); // Create a datum item for sending to UDAF - mcsv1sdk::ColumnDatum datum; + mcsv1sdk::ColumnDatum& datum = valsIn[0]; datum.dataType = (CalpontSystemCatalog::ColDataType)colDataType; switch (colDataType) @@ -3567,27 +3845,27 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData break; } - valsIn.push_back(datum); - rc = context.getFunction()->nextValue(&context, valsIn); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } static_any::any valOut; - rc = context.getFunction()->evaluate(&context, valOut); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); - context.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); } break; @@ -3802,17 +4080,7 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); - } - else - { - throw logic_error("(6)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colAux, i); break; } @@ -4010,46 +4278,45 @@ void RowAggregationUMP2::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut // colAux(in) - Where the UDAF userdata resides // rowUDAF(in) - pointer to the RowUDAFFunctionCol for this UDAF instance //------------------------------------------------------------------------------ -void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF) +void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, + int64_t colAux, uint64_t& funcColsIdx) { static_any::any valOut; - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); // Get the user data - boost::shared_ptr userData = rowIn.getUserData(colIn + 1); + boost::shared_ptr userDataIn = rowIn.getUserData(colIn+1); // Unlike other aggregates, the data isn't in colIn, so testing it for NULL // there won't help. In case of NULL, userData will be NULL. - std::vector flags; - uint32_t flag = 0; + uint32_t flags[1]; - if (!userData) + flags[0] = 0; + if (!userDataIn) { - if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { return; } // Turn on NULL flags - flag |= mcsv1sdk::PARAM_IS_NULL; + flags[0] |= mcsv1sdk::PARAM_IS_NULL; } - flags.push_back(flag); - rgContext.setDataFlags(&flags); + fRGContext.setDataFlags(flags); // The intermediate values are stored in colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setUserData(fRow.getUserData(colAux)); // Call the UDAF subEvaluate method mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->subEvaluate(&rgContext, userData.get()); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userDataIn.get()); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { + RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColsIdx].get()); rowUDAF->bInterrupted = true; - throw logging::IDBExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::IDBExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } } @@ -4242,17 +4509,7 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); - } - else - { - throw logic_error("(7)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colAux, i); break; } diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index b6294f193..14e4313cf 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -50,6 +50,7 @@ #include "stlpoolallocator.h" #include "returnedcolumn.h" #include "mcsv1_udaf.h" +#include "constantcolumn.h" // To do: move code that depends on joblist to a proper subsystem. namespace joblist @@ -110,6 +111,9 @@ enum RowAggFunctionType // User Defined Aggregate Function ROWAGG_UDAF, + // If an Aggregate has more than one parameter, this will be used for parameters after the first + ROWAGG_MULTI_PARM, + // internal function type to avoid duplicate the work // handling ROWAGG_COUNT_NO_OP, ROWAGG_DUP_FUNCT and ROWAGG_DUP_AVG is a little different // ROWAGG_COUNT_NO_OP : count done by AVG, no need to copy @@ -197,6 +201,13 @@ struct RowAggFunctionCol // 4. for duplicate - point to the real aggretate column to be copied from // Set only on UM, the fAuxColumnIndex is defaulted to fOutputColumnIndex+1 on PM. uint32_t fAuxColumnIndex; + + // For UDAF that have more than one parameter and some parameters are constant. + // There will be a series of RowAggFunctionCol created, one for each parameter. + // The first will be a RowUDAFFunctionCol. Subsequent ones will be RowAggFunctionCol + // with fAggFunction == ROWAGG_MULTI_PARM. Order is important. + // If this parameter is constant, that value is here. + SRCP fpConstCol; }; @@ -217,8 +228,11 @@ struct RowUDAFFunctionCol : public RowAggFunctionCol inputColIndex, outputColIndex, auxColIndex), bInterrupted(false) {} - RowUDAFFunctionCol(const RowUDAFFunctionCol& rhs) : RowAggFunctionCol(ROWAGG_UDAF, ROWAGG_FUNCT_UNDEFINE, - rhs.fInputColumnIndex, rhs.fOutputColumnIndex, rhs.fAuxColumnIndex), fUDAFContext(rhs.fUDAFContext) + RowUDAFFunctionCol(const RowUDAFFunctionCol& rhs) : + RowAggFunctionCol(ROWAGG_UDAF, ROWAGG_FUNCT_UNDEFINE, rhs.fInputColumnIndex, + rhs.fOutputColumnIndex, rhs.fAuxColumnIndex), + fUDAFContext(rhs.fUDAFContext), + bInterrupted(false) {} virtual ~RowUDAFFunctionCol() {} @@ -235,6 +249,16 @@ inline void RowAggFunctionCol::serialize(messageqcpp::ByteStream& bs) const bs << (uint8_t)fAggFunction; bs << fInputColumnIndex; bs << fOutputColumnIndex; + if (fpConstCol) + { + bs << (uint8_t)1; + fpConstCol.get()->serialize(bs); + } + else + { + bs << (uint8_t)0; + } + } inline void RowAggFunctionCol::deserialize(messageqcpp::ByteStream& bs) @@ -242,6 +266,13 @@ inline void RowAggFunctionCol::deserialize(messageqcpp::ByteStream& bs) bs >> (uint8_t&)fAggFunction; bs >> fInputColumnIndex; bs >> fOutputColumnIndex; + uint8_t t; + bs >> t; + if (t) + { + fpConstCol.reset(new ConstantColumn); + fpConstCol.get()->unserialize(bs); + } } inline void RowUDAFFunctionCol::serialize(messageqcpp::ByteStream& bs) const @@ -583,7 +614,7 @@ protected: virtual void doAvg(const Row&, int64_t, int64_t, int64_t); virtual void doStatistics(const Row&, int64_t, int64_t, int64_t); virtual void doBitOp(const Row&, int64_t, int64_t, int); - virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); + virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, uint64_t& funcColsIdx); virtual bool countSpecial(const RowGroup* pRG) { fRow.setIntField<8>(fRow.getIntField<8>(0) + pRG->getRowCount(), 0); @@ -660,6 +691,25 @@ protected: //need access to rowgroup storage holding the rows to hash & ==. friend class AggHasher; friend class AggComparator; + + // We need a separate copy for each thread. + mcsv1sdk::mcsv1Context fRGContext; + + // These are handy for testing the actual type of static_any for UDAF + static const static_any::any& charTypeId; + static const static_any::any& scharTypeId; + static const static_any::any& shortTypeId; + static const static_any::any& intTypeId; + static const static_any::any& longTypeId; + static const static_any::any& llTypeId; + static const static_any::any& ucharTypeId; + static const static_any::any& ushortTypeId; + static const static_any::any& uintTypeId; + static const static_any::any& ulongTypeId; + static const static_any::any& ullTypeId; + static const static_any::any& floatTypeId; + static const static_any::any& doubleTypeId; + static const static_any::any& strTypeId; }; //------------------------------------------------------------------------------ @@ -783,6 +833,9 @@ protected: // Sets the value from valOut into column colOut, performing any conversions. void SetUDAFValue(static_any::any& valOut, int64_t colOut); + // If the datatype returned by evaluate isn't what we expect, convert. + void SetUDAFAnyValue(static_any::any& valOut, int64_t colOut); + // calculate the UDAF function all rows received. UM only function. void calculateUDAFColumns(); @@ -877,7 +930,7 @@ protected: void doStatistics(const Row&, int64_t, int64_t, int64_t); void doGroupConcat(const Row&, int64_t, int64_t); void doBitOp(const Row&, int64_t, int64_t, int); - void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); + void doUDAF(const Row&, int64_t, int64_t, int64_t, uint64_t& funcColsIdx); bool countSpecial(const RowGroup* pRG) { return false; diff --git a/utils/udfsdk/CMakeLists.txt b/utils/udfsdk/CMakeLists.txt index e69ff4d88..01009e35a 100755 --- a/utils/udfsdk/CMakeLists.txt +++ b/utils/udfsdk/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ########### next target ############### -set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp) +set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp regr_avgx.cpp avgx.cpp) add_definitions(-DMYSQL_DYNAMIC_PLUGIN) diff --git a/utils/udfsdk/allnull.cpp b/utils/udfsdk/allnull.cpp index b6b8d79da..247b9e28f 100644 --- a/utils/udfsdk/allnull.cpp +++ b/utils/udfsdk/allnull.cpp @@ -27,11 +27,11 @@ struct allnull_data #define OUT_TYPE int64_t mcsv1_UDAF::ReturnCode allnull::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { context->setUserDataSize(sizeof(allnull_data)); - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -52,8 +52,7 @@ mcsv1_UDAF::ReturnCode allnull::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { struct allnull_data* data = (struct allnull_data*)context->getUserData()->data; diff --git a/utils/udfsdk/allnull.h b/utils/udfsdk/allnull.h index 86697b052..6a727caf6 100644 --- a/utils/udfsdk/allnull.h +++ b/utils/udfsdk/allnull.h @@ -48,7 +48,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else @@ -103,7 +102,7 @@ public: * colTypes or wrong number of arguments. Else return * mcsv1_UDAF::SUCCESS. */ - virtual ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); /** * reset() @@ -138,7 +137,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() diff --git a/utils/udfsdk/avg_mode.cpp b/utils/udfsdk/avg_mode.cpp index f39b5e402..5429183d9 100644 --- a/utils/udfsdk/avg_mode.cpp +++ b/utils/udfsdk/avg_mode.cpp @@ -25,9 +25,9 @@ using namespace mcsv1sdk; mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("avg_mode() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode avg_mode::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; MODE_DATA& data = static_cast(context->getUserData())->mData; @@ -187,8 +186,7 @@ mcsv1_UDAF::ReturnCode avg_mode::evaluate(mcsv1Context* context, static_any::any return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; MODE_DATA& data = static_cast(context->getUserData())->mData; diff --git a/utils/udfsdk/avg_mode.h b/utils/udfsdk/avg_mode.h index 4f3442005..fba1fcdcc 100644 --- a/utils/udfsdk/avg_mode.h +++ b/utils/udfsdk/avg_mode.h @@ -18,7 +18,7 @@ /*********************************************************************** * $Id$ * -* mcsv1_UDAF.h +* avg_mode.h ***********************************************************************/ /** @@ -50,13 +50,12 @@ * is also used to describe the interface that is used for * either. */ -#ifndef HEADER_mode -#define HEADER_mode +#ifndef HEADER_avg_mode +#define HEADER_avg_mode #include #include #include -#include #ifdef _MSC_VER #include #else @@ -134,7 +133,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -169,8 +168,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -246,8 +244,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() diff --git a/utils/udfsdk/avgx.cpp b/utils/udfsdk/avgx.cpp new file mode 100644 index 000000000..887a8418e --- /dev/null +++ b/utils/udfsdk/avgx.cpp @@ -0,0 +1,257 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "avgx.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +#define DATATYPE double + +// Use the simple data model +struct avgx_data +{ + double sum; + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode avgx::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 1) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("avgx() with other than 1 arguments"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[0].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("avgx() with a non-numeric x argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(avgx_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 4); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode avgx::reset(mcsv1Context* context) +{ + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + data->sum = 0; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_x = valsIn[0].columnData; + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[0].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum += val; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct avgx_data* outData = (struct avgx_data*)context->getUserData()->data; + struct avgx_data* inData = (struct avgx_data*)userDataIn->data; + + outData->sum += inData->sum; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + + valOut = data->sum / (double)data->cnt; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_x = valsDropped[0].columnData; + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[0].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum -= val; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/avgx.h b/utils/udfsdk/avgx.h new file mode 100644 index 000000000..a830c6803 --- /dev/null +++ b/utils/udfsdk/avgx.h @@ -0,0 +1,98 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* avgx.h +***********************************************************************/ + +/** + * Columnstore interface for for the avgx function + * + * + * CREATE AGGREGATE FUNCTION avgx returns REAL soname + * 'libudf_mysql.so'; + * + */ +#ifndef HEADER_avgx +#define HEADER_avgx + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. + +// Return the avgx value of the dataset + +class avgx : public mcsv1_UDAF +{ +public: + // Defaults OK + avgx() : mcsv1_UDAF() {}; + virtual ~avgx() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_.h + diff --git a/utils/udfsdk/docs/source/changelog.rst b/utils/udfsdk/docs/source/changelog.rst index fcd93d54c..1a7c749f9 100644 --- a/utils/udfsdk/docs/source/changelog.rst +++ b/utils/udfsdk/docs/source/changelog.rst @@ -5,4 +5,5 @@ Version History | Version | Date | Changes | +=========+============+=============================+ | 1.1.0α | 2017-08-25 | - First alpha release | +| 1.2.0α | 2016-05-18 | - Add multi parm support | +---------+------------+-----------------------------+ diff --git a/utils/udfsdk/docs/source/reference/ColumnDatum.rst b/utils/udfsdk/docs/source/reference/ColumnDatum.rst index dd1006363..5304a2953 100644 --- a/utils/udfsdk/docs/source/reference/ColumnDatum.rst +++ b/utils/udfsdk/docs/source/reference/ColumnDatum.rst @@ -1,3 +1,5 @@ +.. _ColumnDatum: + ColumnDatum =========== @@ -13,7 +15,7 @@ Example for int data: int myint = valIn.cast(); -For multi-paramter aggregations (not available in Columnstore 1.1), the colsIn vector of next_value() contains the ordered set of row parameters. +For multi-paramter aggregations (not available in Columnstore 1.1), the colsIn array of next_value() contains the ordered set of row parameters. For char, varchar, text, varbinary and blob types, columnData will be std::string. @@ -59,7 +61,7 @@ The provided values are: * - SMALLINT - A signed two byte integer * - DECIMAL - - A Columnstore Decimal value. For Columnstore 1.1, this is stored in the smallest integer type field that will hold the required precision. + - A Columnstore Decimal value. This is stored in the smallest integer type field that will hold the required precision. * - MEDINT - A signed four byte integer * - INT diff --git a/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst b/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst index 1f6fa7acb..d031705d8 100644 --- a/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst +++ b/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst @@ -13,7 +13,7 @@ The library placed in mysql/lib is the name you use in the SQL CREATE AGGREGATE CREATE AGGREGATE FUNCTION ssq returns REAL soname 'libudf_mysql.so'; -Unlike the code you write for the Columnstore UDAF, MariaDB does not handle allocation and de-allocation of your memory structures. If writing your function for other engines, you must handle allocation and de-alloaction in :ref:`function_init ` and :ref:`function_deinit ` +Unlike the code you write for the Columnstore UDAF, MariaDB does not handle allocation and de-allocation of your memory structures in other engines. If writing your function for other engines, you must handle allocation and de-alloaction in :ref:`function_init ` and :ref:`function_deinit ` All of the MariaDB UDF and UDAF example functions are in a single source file named udfmysql.cpp and linked into libudf_mysql.so. diff --git a/utils/udfsdk/docs/source/reference/UDAFMap.rst b/utils/udfsdk/docs/source/reference/UDAFMap.rst index 48706bab3..d3cda63f4 100644 --- a/utils/udfsdk/docs/source/reference/UDAFMap.rst +++ b/utils/udfsdk/docs/source/reference/UDAFMap.rst @@ -3,7 +3,7 @@ UDAFMap ======= -The UDAFMap is where we tell the system about our function. For Columnstore 1.1, you must manually place your function into this map. +The UDAFMap is where we tell the system about our function. For Columnstore 1.2, you must manually place your function into this map. * open mcsv1_udaf.cpp * add your header to the #include list diff --git a/utils/udfsdk/docs/source/reference/mcsv1Context.rst b/utils/udfsdk/docs/source/reference/mcsv1Context.rst index 279220fb3..02adf57ab 100644 --- a/utils/udfsdk/docs/source/reference/mcsv1Context.rst +++ b/utils/udfsdk/docs/source/reference/mcsv1Context.rst @@ -150,7 +150,7 @@ Use these to determine the way your UDA(n)F was called .. c:function:: size_t getParameterCount() const; -:returns: the number of parameters to the function in the SQL query. Columnstore 1.1 only supports one parameter. +:returns: the number of parameters to the function in the SQL query. .. c:function:: bool isParamNull(int paramIdx); diff --git a/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst b/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst index 73c8f6570..f75fe73fc 100644 --- a/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst +++ b/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst @@ -1,4 +1,4 @@ -.. _ mcsv1_udaf: +.. _mcsv1_udaf: mcsv1_UDAF ========== @@ -11,12 +11,14 @@ The base class has no data members. It is designed to be only a container for yo However, adding static const members makes sense. -For UDAF (not Wndow Functions) Aggregation takes place in three stages: +For UDAF (not Window Functions) Aggregation takes place in three stages: * Subaggregation on the PM. nextValue() * Consolodation on the UM. subevaluate() * Evaluation of the function on the UM. evaluate() +There are situations where the system makes a choice to perform all UDAF calculations on the UM. The presence of group_concat() in the query and certain joins can cause the optimizer to make this choice. + For Window Functions, all aggregation occurs on the UM, and thus the subevaluate step is skipped. There is an optional dropValue() function that may be added. * Aggregation on the UM. nextValue() @@ -80,17 +82,11 @@ Callback Methods .. _init: -.. c:function:: ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); +.. c:function:: ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); :param context: The context object for this call. -:param colTypes: A list of the column types of the parameters. - - COL_TYPES is defined as:: - - typedef std::vector >COL_TYPES; - - In Columnstore 1.1, only one column is supported, so colTyoes will be of length one. +:param colTypes: A list of ColumnDatum structures. Use this to access the column types of the parameters. colTypes.columnData will be invalid. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS @@ -116,25 +112,23 @@ Callback Methods .. _nextvalue: -.. c:function:: ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); +.. c:function:: ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); :param context: The context object for this call -:param valsIn: a vector representing the values to be added for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. - +:param valsIn: an array representing the values to be added for each parameter for this row. + :returns: ReturnCode::ERROR or ReturnCode::SUCCESS Use context->getUserData() and type cast it to your UserData type or Simple Data Model stuct. nextValue() is called for each Window movement that passes the WHERE and HAVING clauses. The context's UserData will contain values that have been sub-aggregated to this point for the group, partition or Window Frame. nextValue is called on the PM for aggregation and on the UM for Window Functions. - When used in an aggregate, the function may not rely on order or completeness since the sub-aggregation is going on at the PM, it only has access to the data stored on the PM's dbroots. + When used in an aggregate, the function should not rely on order or completeness since the sub-aggregation is going on at the PM, it only has access to the data stored on the PM's dbroots. - When used as a analytic function (Window Function), nextValue is call for each Window movement in the Window. If dropValue is defined, then it may be called for every value leaving the Window, and nextValue called for each new value entering the Window. + When used as a analytic function (Window Function), nextValue is called for each Window movement in the Window. If dropValue is defined, then it may be called for every value leaving the Window, and nextValue called for each new value entering the Window. - Since this is called for every row, it is important that this method be efficient. + Since this may called for every row, it is important that this method be efficient. .. _subevaluate: @@ -172,13 +166,11 @@ Callback Methods .. _dropvalue: -.. c:function:: ReturnCode dropValue(mcsv1Context* context, std::vector& valsDropped); +.. c:function:: ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); :param context: The context object for this call -:param valsDropped: a vector representing the values to be dropped for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. +:param valsDropped: an array representing the values to be dropped for each parameter for this row. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS diff --git a/utils/udfsdk/docs/source/usage/cmakelists.rst b/utils/udfsdk/docs/source/usage/cmakelists.rst index 32a218459..a7ddacbaf 100644 --- a/utils/udfsdk/docs/source/usage/cmakelists.rst +++ b/utils/udfsdk/docs/source/usage/cmakelists.rst @@ -3,7 +3,7 @@ CMakeLists.txt ============== -For Columnstore 1.1, you compile your function by including it in the CMakeLists.txt file for the udfsdk. +For Columnstore 1.2, you compile your function by including it in the CMakeLists.txt file for the udfsdk. You need only add the new .cpp files to the udfsdk_LIB_SRCS target list:: diff --git a/utils/udfsdk/docs/source/usage/compile.rst b/utils/udfsdk/docs/source/usage/compile.rst index e6319e45b..b96af5d80 100644 --- a/utils/udfsdk/docs/source/usage/compile.rst +++ b/utils/udfsdk/docs/source/usage/compile.rst @@ -3,7 +3,7 @@ Compile ======= -To compile your function for Columnstore 1.1, simple recompile the udfsdk directory:: +To compile your function for Columnstore 1.2, simply recompile the udfsdk directory:: cd utils/usdsdk cmake . diff --git a/utils/udfsdk/docs/source/usage/headerfile.rst b/utils/udfsdk/docs/source/usage/headerfile.rst index 720acc5be..afb043e98 100644 --- a/utils/udfsdk/docs/source/usage/headerfile.rst +++ b/utils/udfsdk/docs/source/usage/headerfile.rst @@ -5,7 +5,7 @@ Header file Usually, each UDA(n)F function will have one .h and one .cpp file plus code for the mariadb UDAF plugin which may or may not be in a separate file. It is acceptable to put a set of related functions in the same files or use separate files for each. -The easiest way to create these files is to copy them an example closest to the type of function you intend to create. +The easiest way to create these files is to copy them from an example closest to the type of function you intend to create. Your header file must have a class defined that will implement your function. This class must be derived from mcsv1_UDAF and be in the mcsv1sdk namespace. The following examples use the "allnull" UDAF. @@ -29,9 +29,9 @@ allnull uses the Simple Data Model. See :ref:`complexdatamodel` to see how that allnull() : mcsv1_UDAF(){}; virtual ~allnull(){}; - virtual ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); virtual ReturnCode reset(mcsv1Context* context); - virtual ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn); virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); }; diff --git a/utils/udfsdk/docs/source/usage/introduction.rst b/utils/udfsdk/docs/source/usage/introduction.rst index 6b3544a1e..19c612caa 100644 --- a/utils/udfsdk/docs/source/usage/introduction.rst +++ b/utils/udfsdk/docs/source/usage/introduction.rst @@ -3,7 +3,7 @@ mcsv1_udaf Introduction mcsv1_udaf is a C++ API for writing User Defined Aggregate Functions (UDAF) and User Defined Analytic Functions (UDAnF) for the MariaDB Columstore engine. -In Columnstore 1.1.0, functions written using this API must be compiled into the udfsdk and udf_mysql libraries of the Columnstore code branch. +In Columnstore 1.2, functions written using this API must be compiled into the udfsdk and udf_mysql libraries of the Columnstore code branch. The API has a number of features. The general theme is, there is a class that represents the function, there is a context under which the function operates, and there is a data store for intermediate values. @@ -18,5 +18,5 @@ The steps required to create a function are: * :ref:`Compile udfsdk `. * :ref:`Copy the compiled libraries ` to the working directories. -In 1.1.0, Columnstore does not have a plugin framework, so the functions have to be compiled into the libraries that Columnstore already loads. +In 1.2, Columnstore does not have a plugin framework, so the functions have to be compiled into the libraries that Columnstore already loads. diff --git a/utils/udfsdk/docs/source/usage/sourcefile.rst b/utils/udfsdk/docs/source/usage/sourcefile.rst index b7ed38a32..5c43f29e4 100644 --- a/utils/udfsdk/docs/source/usage/sourcefile.rst +++ b/utils/udfsdk/docs/source/usage/sourcefile.rst @@ -34,21 +34,17 @@ Or, if using the :ref:`complexdatamodel`, type cast the UserData to your UserDat init() ------ -.. c:function:: ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); +.. c:function:: ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); :param context: The context object for this call. -:param colTypes: A list of the column types of the parameters. +:param colTypes: A list of the ColumnDatum used to access column types of the parameters. In init(), the columnData member is invalid. - COL_TYPES is defined as:: - - typedef std::vector >COL_TYPES; - - see :ref:`ColDataTypes `. In Columnstore 1.1, only one column is supported, so colTyoes will be of length one. + see :ref:`ColumnDatum`. In Columnstore 1.2, An arbitrary number of parameters is supported. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS -The init() method is where you sanity check the input, set the output type and set any run flags for this instance. init() is called one time from the mysqld process. All settings you do here are propagated through the system. +The init() method is where you sanity check the input datatypes, set the output type and set any run flags for this instance. init() is called one time from the mysqld process. All settings you do here are propagated through the system. init() is the exception to type casting the UserData member of context. UserData has not been created when init() is called, so you shouldn't use it here. @@ -60,13 +56,14 @@ If you're using :ref:`simpledatamodel`, you need to set the size of the structur .. rubric:: Check parameter count and type -Each function expects a certain number of columns to entered as parameters in the SQL query. For columnstore 1.1, the number of parameters is limited to one. +Each function expects a certain number of columns to be entered as parameters in the SQL query. It is possible to create a UDAF that accepts a variable number of parameters. You can discover which ones were actually used in init(), and modify your function's behavior accordingly. -colTypes is a vector of each parameter name and type. The name is the colum name from the SQL query. You can use this information to sanity check for compatible type(s) and also to modify your functions behavior based on type. To do this, add members to your data struct to be tested in the other Methods. Set these members based on colDataTypes (:ref:`ColDataTypes `). +colTypes is an array of ColumnData from which can be gleaned the type and name. The name is the column name from the SQL query. You can use this information to sanity check for compatible type(s) and also to modify your functions behavior based on type. To do this, add members to your data struct to be tested in the other Methods. Set these members based on colDataTypes (:ref:`ColDataTypes `). +The actual number of paramters passed can be gotten from context->getParameterCount(). :: - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -84,7 +81,7 @@ When you create your function using the SQL CREATE FUNCTION command, you must in .. rubric:: Set width and scale -If you have secial requirements, especially if you might be dealing with decimal types:: +If you have special requirements, especially if you might be dealing with decimal types:: context->setColWidth(8); context->setScale(context->getScale()*2); @@ -117,13 +114,11 @@ This function may be called multiple times from both the UM and the PM. Make no nextValue() ----------- -.. c:function:: ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); +.. c:function:: ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); :param context: The context object for this call -:param valsIn: a vector representing the values to be added for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. +:param valsIn: an array representing the values to be added for each parameter for this row. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS @@ -208,7 +203,7 @@ For AVG, you might see:: dropValue --------- -.. c:function:: ReturnCode dropValue(mcsv1Context* context, std::vector& valsDropped); +.. c:function:: ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); :param context: The context object for this call diff --git a/utils/udfsdk/mcsv1_udaf.cpp b/utils/udfsdk/mcsv1_udaf.cpp index 349a642ec..ee08dcc07 100644 --- a/utils/udfsdk/mcsv1_udaf.cpp +++ b/utils/udfsdk/mcsv1_udaf.cpp @@ -36,6 +36,8 @@ UDAF_MAP UDAFMap::fm; #include "ssq.h" #include "median.h" #include "avg_mode.h" +#include "regr_avgx.h" +#include "avgx.h" UDAF_MAP& UDAFMap::getMap() { if (fm.size() > 0) @@ -52,6 +54,8 @@ UDAF_MAP& UDAFMap::getMap() fm["ssq"] = new ssq(); fm["median"] = new median(); fm["avg_mode"] = new avg_mode(); + fm["regr_avgx"] = new regr_avgx(); + fm["avgx"] = new avgx(); return fm; } @@ -115,8 +119,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const { // We don't test the per row data fields. They don't determine // if it's the same Context. - if (getName() != c.getName() - || fRunFlags != c.fRunFlags + if (getName() != c.getName() + ||fRunFlags != c.fRunFlags || fContextFlags != c.fContextFlags || fUserDataSize != c.fUserDataSize || fResultType != c.fResultType @@ -125,7 +129,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const || fStartFrame != c.fStartFrame || fEndFrame != c.fEndFrame || fStartConstant != c.fStartConstant - || fEndConstant != c.fEndConstant) + || fEndConstant != c.fEndConstant + || fParamCount != c.fParamCount) return false; return true; @@ -217,6 +222,7 @@ void mcsv1Context::serialize(messageqcpp::ByteStream& b) const b << (uint32_t)fEndFrame; b << fStartConstant; b << fEndConstant; + b << fParamCount; } void mcsv1Context::unserialize(messageqcpp::ByteStream& b) @@ -238,6 +244,7 @@ void mcsv1Context::unserialize(messageqcpp::ByteStream& b) fEndFrame = (WF_FRAME)frame; b >> fStartConstant; b >> fEndConstant; + b >> fParamCount; } void UserData::serialize(messageqcpp::ByteStream& bs) const diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index d24852c28..e09228d77 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -68,7 +68,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else @@ -77,6 +76,7 @@ #include "any.hpp" #include "calpontsystemcatalog.h" #include "wf_frame.h" +#include "my_decimal_limits.h" using namespace execplan; @@ -200,12 +200,8 @@ static uint64_t CONTEXT_IS_PM __attribute__ ((unused)) = 1 << 2; // Flags that describe the contents of a specific input parameter // These will be set in context->dataFlags for each method call by the framework. // User code shouldn't use these directly -static uint64_t PARAM_IS_NULL __attribute__ ((unused)) = 1; -static uint64_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; - -// shorthand for the list of columns in the call sent to init() -// first is the actual column name and second is the data type in Columnstore. -typedef std::vector >COL_TYPES; +static uint32_t PARAM_IS_NULL __attribute__ ((unused)) = 1; +static uint32_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; // This is the context class that is passed to all API callbacks // The framework potentially sets data here for each invocation of @@ -269,7 +265,9 @@ public: EXPORT bool isPM(); // Parameter refinement description accessors - // valid in nextValue and dropValue + + // How many actual parameters were entered. + // valid in all calls size_t getParameterCount() const; // Determine if an input parameter is NULL @@ -298,6 +296,7 @@ public: // This only makes sense if the return type is decimal, but should be set // to (0, -1) for other types if the inout is decimal. // valid in init() + // Set the scale to DECIMAL_NOT_SPECIFIED if you want a floating decimal. EXPORT bool setScale(int32_t scale); EXPORT bool setPrecision(int32_t precision); @@ -372,7 +371,7 @@ private: int32_t fResultscale; // For scale, the number of digits to the right of the decimal int32_t fResultPrecision; // The max number of digits allowed in the decimal value std::string errorMsg; - std::vector* dataFlags; // one entry for each parameter + uint32_t* dataFlags; // an integer array wirh one entry for each parameter bool* bInterrupted; // Gets set to true by the Framework if something happens WF_FRAME fStartFrame; // Is set to default to start, then modified by the actual frame in the call WF_FRAME fEndFrame; // Is set to default to start, then modified by the actual frame in the call @@ -380,6 +379,7 @@ private: int32_t fEndConstant; // for end frame WF_PRECEEDIMG or WF_FOLLOWING std::string functionName; mcsv1sdk::mcsv1_UDAF* func; + int32_t fParamCount; public: // For use by the framework @@ -394,13 +394,14 @@ public: EXPORT void clearContextFlag(uint64_t flag); EXPORT uint64_t getContextFlags() const; EXPORT uint32_t getUserDataSize() const; - EXPORT std::vector& getDataFlags(); - EXPORT void setDataFlags(std::vector* flags); + EXPORT uint32_t* getDataFlags(); + EXPORT void setDataFlags(uint32_t* flags); EXPORT void setInterrupted(bool interrupted); EXPORT void setInterrupted(bool* interrupted); EXPORT mcsv1sdk::mcsv1_UDAF* getFunction(); EXPORT mcsv1sdk::mcsv1_UDAF* getFunction() const; EXPORT boost::shared_ptr getUserDataSP(); + EXPORT void setParamCount(int32_t paramCount); }; // Since aggregate functions can operate on any data type, we use the following structure @@ -419,9 +420,10 @@ public: struct ColumnDatum { CalpontSystemCatalog::ColDataType dataType; // defined in calpontsystemcatalog.h - static_any::any columnData; + static_any::any columnData; // Not valid in init() uint32_t scale; // If dataType is a DECIMAL type uint32_t precision; // If dataType is a DECIMAL type + std::string alias; // Only filled in for init() ColumnDatum() : dataType(CalpontSystemCatalog::UNDEFINED), scale(0), precision(-1) {}; }; @@ -466,7 +468,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes) = 0; + ColumnDatum* colTypes) = 0; /** * reset() @@ -501,8 +503,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn) = 0; + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn) = 0; /** * subEvaluate() @@ -579,8 +580,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() @@ -640,32 +640,32 @@ inline mcsv1Context::mcsv1Context() : fEndFrame(WF_CURRENT_ROW), fStartConstant(0), fEndConstant(0), - func(NULL) + func(NULL), + fParamCount(0) { } inline mcsv1Context::mcsv1Context(const mcsv1Context& rhs) : - fContextFlags(0), - fColWidth(0), - dataFlags(NULL), - bInterrupted(NULL), - func(NULL) + dataFlags(NULL) { copy(rhs); } inline mcsv1Context& mcsv1Context::copy(const mcsv1Context& rhs) { - fRunFlags = rhs.getRunFlags(); - fResultType = rhs.getResultType(); - fUserDataSize = rhs.getUserDataSize(); - fResultscale = rhs.getScale(); - fResultPrecision = rhs.getPrecision(); + fRunFlags = rhs.fRunFlags; + fContextFlags = rhs.fContextFlags; + fResultType = rhs.fResultType; + fUserDataSize = rhs.fUserDataSize; + fColWidth = rhs.fColWidth; + fResultscale = rhs.fResultscale; + fResultPrecision = rhs.fResultPrecision; rhs.getStartFrame(fStartFrame, fStartConstant); rhs.getEndFrame(fEndFrame, fEndConstant); - functionName = rhs.getName(); - bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference - func = rhs.func; + functionName = rhs.functionName; + bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference + func = rhs.func; + fParamCount = rhs.fParamCount; return *this; } @@ -675,11 +675,7 @@ inline mcsv1Context::~mcsv1Context() inline mcsv1Context& mcsv1Context::operator=(const mcsv1Context& rhs) { - fContextFlags = 0; - fColWidth = 0; dataFlags = NULL; - bInterrupted = NULL; - func = NULL; return copy(rhs); } @@ -753,16 +749,13 @@ inline bool mcsv1Context::isPM() inline size_t mcsv1Context::getParameterCount() const { - if (dataFlags) - return dataFlags->size(); - - return 0; + return fParamCount; } inline bool mcsv1Context::isParamNull(int paramIdx) { if (dataFlags) - return (*dataFlags)[paramIdx] & PARAM_IS_NULL; + return dataFlags[paramIdx] & PARAM_IS_NULL; return false; } @@ -770,7 +763,7 @@ inline bool mcsv1Context::isParamNull(int paramIdx) inline bool mcsv1Context::isParamConstant(int paramIdx) { if (dataFlags) - return (*dataFlags)[paramIdx] & PARAM_IS_CONSTANT; + return dataFlags[paramIdx] & PARAM_IS_CONSTANT; return false; } @@ -939,18 +932,22 @@ inline uint32_t mcsv1Context::getUserDataSize() const return fUserDataSize; } -inline std::vector& mcsv1Context::getDataFlags() +inline uint32_t* mcsv1Context::getDataFlags() { - return *dataFlags; + return dataFlags; } -inline void mcsv1Context::setDataFlags(std::vector* flags) +inline void mcsv1Context::setDataFlags(uint32_t* flags) { dataFlags = flags; } -inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, - std::vector& valsDropped) +inline void mcsv1Context::setParamCount(int32_t paramCount) +{ + fParamCount = paramCount; +} + +inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { return NOT_IMPLEMENTED; } diff --git a/utils/udfsdk/median.cpp b/utils/udfsdk/median.cpp index e32d721f1..9c7e72dc3 100644 --- a/utils/udfsdk/median.cpp +++ b/utils/udfsdk/median.cpp @@ -25,9 +25,9 @@ using namespace mcsv1sdk; mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("median() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode median::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; MEDIAN_DATA& data = static_cast(context->getUserData())->mData; @@ -212,8 +211,7 @@ mcsv1_UDAF::ReturnCode median::evaluate(mcsv1Context* context, static_any::any& return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; MEDIAN_DATA& data = static_cast(context->getUserData())->mData; diff --git a/utils/udfsdk/median.h b/utils/udfsdk/median.h index d64792461..48bd93c70 100644 --- a/utils/udfsdk/median.h +++ b/utils/udfsdk/median.h @@ -56,7 +56,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else @@ -134,7 +133,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -169,8 +168,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -246,8 +244,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() diff --git a/utils/udfsdk/regr_avgx.cpp b/utils/udfsdk/regr_avgx.cpp new file mode 100644 index 000000000..aec4f361f --- /dev/null +++ b/utils/udfsdk/regr_avgx.cpp @@ -0,0 +1,266 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_avgx.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +#define DATATYPE double + +// Use the simple data model +struct regr_avgx_data +{ + double sum; + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode regr_avgx::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgx() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[1].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgx() with a non-numeric x argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_avgx_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[1].scale + 4); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_avgx::reset(mcsv1Context* context) +{ + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + data->sum = 0; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (context->isParamNull(0) || context->isParamNull(1)) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + if (valIn_x.empty() || valIn_y.empty()) // Usually empty if NULL. Probably redundant + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[1].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum += val; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_avgx_data* outData = (struct regr_avgx_data*)context->getUserData()->data; + struct regr_avgx_data* inData = (struct regr_avgx_data*)userDataIn->data; + + outData->sum += inData->sum; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + + if (data->cnt == 0) + { + valOut = 0; + } + else + { + valOut = data->sum / (double)data->cnt; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty() || valIn_y.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[1].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum -= val; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/regr_avgx.h b/utils/udfsdk/regr_avgx.h new file mode 100644 index 000000000..27b8708f7 --- /dev/null +++ b/utils/udfsdk/regr_avgx.h @@ -0,0 +1,98 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_avgx.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_avgx function + * + * + * CREATE AGGREGATE FUNCTION regr_avgx returns REAL soname + * 'libudf_mysql.so'; + * + */ +#ifndef HEADER_regr_avgx +#define HEADER_regr_avgx + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. + +// Return the regr_avgx value of the dataset + +class regr_avgx : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_avgx() : mcsv1_UDAF() {}; + virtual ~regr_avgx() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_avgx.h + diff --git a/utils/udfsdk/ssq.cpp b/utils/udfsdk/ssq.cpp index 4d9ef7e10..20fdc33db 100644 --- a/utils/udfsdk/ssq.cpp +++ b/utils/udfsdk/ssq.cpp @@ -34,9 +34,9 @@ struct ssq_data #define OUT_TYPE int64_t mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -44,13 +44,13 @@ mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("ssq() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -81,8 +81,7 @@ mcsv1_UDAF::ReturnCode ssq::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; @@ -183,8 +182,7 @@ mcsv1_UDAF::ReturnCode ssq::evaluate(mcsv1Context* context, static_any::any& val return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; diff --git a/utils/udfsdk/ssq.h b/utils/udfsdk/ssq.h index 514c7a3f0..e27ecf1fa 100644 --- a/utils/udfsdk/ssq.h +++ b/utils/udfsdk/ssq.h @@ -56,7 +56,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else @@ -114,7 +113,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -147,8 +146,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -224,8 +222,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); protected: }; diff --git a/utils/udfsdk/udfmysql.cpp b/utils/udfsdk/udfmysql.cpp index 981651c43..dc0277ccc 100644 --- a/utils/udfsdk/udfmysql.cpp +++ b/utils/udfsdk/udfmysql.cpp @@ -490,6 +490,168 @@ extern "C" // return data->sumsq; return 0; } + +//======================================================================= + + /** + * regr_avgx connector stub + */ + struct regr_avgx_data + { + double sumx; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_avgx_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_avgx() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // TODO test for NULL in x and y + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + ++data->cnt; + data->sumx += xval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } + +//======================================================================= + + /** + * avgx connector stub. Exactly the same functionality as the + * built in avg() function. Use to test the performance of the + * API + */ + struct avgx_data + { + double sumx; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct avgx_data* data; + if (args->arg_count != 1) + { + strcpy(message,"avgx() requires one argument"); + return 1; + } + + if (!(data = (struct avgx_data*) malloc(sizeof(struct avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct avgx_data* data = (struct avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // TODO test for NULL in x and y + struct avgx_data* data = (struct avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + ++data->cnt; + data->sumx += xval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct avgx_data* data = (struct avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } } // vim:ts=4 sw=4: diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index 664b0e7de..fe1f3fd0e 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -204,8 +204,10 @@ Filters="*.c;*.C;*.cc;*.cpp;*.cp;*.cxx;*.c++;*.prg;*.pas;*.dpr;*.asm;*.s;*.bas;*.java;*.cs;*.sc;*.e;*.cob;*.html;*.rc;*.tcl;*.py;*.pl;*.d"> + + @@ -215,8 +217,10 @@ Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if"> + + diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index f302c49cd..ee48360f1 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -52,6 +52,7 @@ using namespace joblist; namespace windowfunction { + template boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context) { @@ -142,7 +143,7 @@ template void WF_udaf::resetData() { getContext().getFunction()->reset(&getContext()); - fSet.clear(); + fDistinctSet.clear(); WindowFunctionType::resetData(); } @@ -150,8 +151,8 @@ template void WF_udaf::parseParms(const std::vector& parms) { bRespectNulls = true; - // parms[1]: respect null | ignore null - ConstantColumn* cc = dynamic_cast(parms[1].get()); + // The last parms: respect null | ignore null + ConstantColumn* cc = dynamic_cast(parms[parms.size()-1].get()); idbassert(cc != NULL); bool isNull = false; // dummy, harded coded bRespectNulls = (cc->getIntVal(fRow, isNull) > 0); @@ -167,52 +168,71 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) } mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - uint64_t colOut = fFieldIndex[0]; - uint64_t colIn = fFieldIndex[1]; - mcsv1sdk::ColumnDatum datum; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colOut); + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC); + + // Put the parameter metadata (type, scale, precision) into valsIn + mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) + { + uint64_t colIn = fFieldIndex[i+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } for (int64_t i = b; i < e; i++) { if (i % 1000 == 0 && fStep->cancelled()) break; + bool bHasNull = false; fRow.setData(getPointer(fRowData->at(i))); // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; + uint32_t flags[getContext().getParameterCount()]; - if (fRow.isNullValue(colIn) == true) + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - if (!bRespectNulls) + uint64_t colIn = fFieldIndex[k+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[k]; + flags[k] = 0; + if (fRow.isNullValue(colIn) == true) { - continue; + if (!bRespectNulls) + { + bHasNull = true; + break; + } + + flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - flag |= mcsv1sdk::PARAM_IS_NULL; + T valIn; + getValue(colIn, valIn, &datum.dataType); + + // Check for distinct, if turned on. + // Currently, distinct only works for param 1 + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + + datum.columnData = valIn; } - - flags.push_back(flag); - getContext().setDataFlags(&flags); - - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - // TODO: when we impliment distinct, we need to revist this. - if ((fDistinct) || (fSet.find(valIn) != fSet.end())) + if (bHasNull) { continue; } - datum.columnData = valIn; - - std::vector valsIn; - valsIn.push_back(datum); - rc = getContext().getFunction()->dropValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::NOT_IMPLEMENTED) @@ -431,7 +451,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { mcsv1sdk::mcsv1_UDAF::ReturnCode rc; uint64_t colOut = fFieldIndex[0]; - + bool isNull = false; if ((fFrameUnit == WF__FRAME_ROWS) || (fPrev == -1) || (!fPeer->operator()(getPointer(fRowData->at(c)), getPointer(fRowData->at(fPrev))))) @@ -442,59 +462,268 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) else if (fPrev <= e && fPrev > c) e = c; - uint64_t colIn = fFieldIndex[1]; + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC); - mcsv1sdk::ColumnDatum datum; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colOut); + // Put the parameter metadata (type, scale, precision) into valsIn + mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + ConstantColumn* cc = NULL; + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) + { + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + cc = static_cast(fConstantParms[i].get()); + if (cc) + { + datum.dataType = cc->resultType().colDataType; + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + uint64_t colIn = fFieldIndex[i+1]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } + } if (b <= c && c <= e) getContext().setContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); else getContext().clearContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); - + bool bHasNull = false; for (int64_t i = b; i <= e; i++) { if (i % 1000 == 0 && fStep->cancelled()) break; fRow.setData(getPointer(fRowData->at(i))); - // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; - if (fRow.isNullValue(colIn) == true) + // NULL flags + uint32_t flags[getContext().getParameterCount()]; + bHasNull = false; + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - if (!bRespectNulls) + cc = static_cast(fConstantParms[k].get()); + uint64_t colIn = fFieldIndex[k+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[k]; + + // Turn on Null flags or skip based on respect nulls + flags[k] = 0; + if ((!cc && fRow.isNullValue(colIn) == true) + || (cc && cc->type() == ConstantColumn::NULLDATA)) { - continue; + if (!bRespectNulls) + { + bHasNull = true; + break; + } + + flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - flag |= mcsv1sdk::PARAM_IS_NULL; + if (!bHasNull && !(flags[k] & mcsv1sdk::PARAM_IS_NULL)) + { + switch (datum.dataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + { + int64_t valIn; + if (cc) + { + valIn = cc->getIntVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + { + int64_t valIn; + if (cc) + { + valIn = cc->getDecimalVal(fRow, isNull).value; + } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + { + uint64_t valIn; + if (cc) + { + valIn = cc->getUintVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + { + double valIn; + if (cc) + { + valIn = cc->getDoubleVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + { + float valIn; + if (cc) + { + valIn = cc->getFloatVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::BLOB: + { + string valIn; + if (cc) + { + valIn = cc->getStrVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + default: + { + string errStr = "(" + colType2String[i] + ")"; + errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + + break; + } + } + } } - - flags.push_back(flag); - getContext().setDataFlags(&flags); - - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - if ((fDistinct) || (fSet.find(valIn) != fSet.end())) + // Skip if any value is NULL and respect nulls is off. + if (bHasNull) { continue; } - - if (fDistinct) - fSet.insert(valIn); - - datum.columnData = valIn; - - std::vector valsIn; - valsIn.push_back(datum); - + getContext().setDataFlags(flags); + rc = getContext().getFunction()->nextValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) diff --git a/utils/windowfunction/wf_udaf.h b/utils/windowfunction/wf_udaf.h index babb32565..fc3f9006d 100644 --- a/utils/windowfunction/wf_udaf.h +++ b/utils/windowfunction/wf_udaf.h @@ -21,18 +21,38 @@ #ifndef UTILS_WF_UDAF_H #define UTILS_WF_UDAF_H -#include +#ifndef _MSC_VER +#include +#else +#include +#endif #include "windowfunctiontype.h" #include "mcsv1_udaf.h" namespace windowfunction { +// Hash classes for the distinct hashmap +class DistinctHasher +{ +public: + inline size_t operator()(const static_any::any& a) const + { + return a.getHash(); + } +}; + +class DistinctEqual +{ +public: + inline bool operator()(const static_any::any& lhs, static_any::any& rhs) const + { + return lhs == rhs; + } +}; // A class to control the execution of User Define Analytic Functions (UDAnF) // as defined by a specialization of mcsv1sdk::mcsv1_UDAF -// The template parameter is currently only used to support DISTINCT, as -// as that is done via a set template class WF_udaf : public WindowFunctionType { @@ -72,7 +92,8 @@ protected: bool fDistinct; bool bRespectNulls; // respect null | ignore null bool bHasDropValue; // Set to false when we discover the UDAnF doesn't implement dropValue. - std::set fSet; // To hold distinct values + // To hold distinct values + std::tr1::unordered_set fDistinctSet; static_any::any fValOut; // The return value public: diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp index 950045899..f5598a7e5 100644 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -39,7 +39,6 @@ using namespace logging; using namespace ordering; #include "calpontsystemcatalog.h" -#include "constantcolumn.h" #include "dataconvert.h" // int64_t IDB_pow[19] using namespace execplan; @@ -228,6 +227,9 @@ WindowFunctionType::makeWindowFunction(const string& name, int ct, WindowFunctio break; } + // Copy the only the constant parameter pointers + af->constParms(wc->functionParms()); + return af; } @@ -492,10 +494,10 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos) static uint64_t dateNull = joblist::DATENULL; static uint64_t datetimeNull = joblist::DATETIMENULL; static uint64_t timeNull = joblist::TIMENULL; - static uint64_t char1Null = joblist::CHAR1NULL; - static uint64_t char2Null = joblist::CHAR2NULL; - static uint64_t char4Null = joblist::CHAR4NULL; - static uint64_t char8Null = joblist::CHAR8NULL; +// static uint64_t char1Null = joblist::CHAR1NULL; +// static uint64_t char2Null = joblist::CHAR2NULL; +// static uint64_t char4Null = joblist::CHAR4NULL; +// static uint64_t char8Null = joblist::CHAR8NULL; static string stringNull(""); void* v = NULL; @@ -634,6 +636,26 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos) return v; } +void WindowFunctionType::constParms(const std::vector& functionParms) +{ + // fConstantParms will end up with a copy of functionParms, but only + // the constant types will be copied. Other types will take up space but + // be NULL. This allows us to acces the constants without the overhead + // of dynamic_cast for every row. + for (size_t i = 0; i < functionParms.size(); ++i) + { + ConstantColumn* cc = dynamic_cast(functionParms[i].get()); + if (cc) + { + fConstantParms.push_back(functionParms[i]); + } + else + { + fConstantParms.push_back(SRCP(cc)); + } + } +} + } //namespace // vim:ts=4 sw=4: diff --git a/utils/windowfunction/windowfunctiontype.h b/utils/windowfunction/windowfunctiontype.h index 50732d3b5..efa1c548a 100644 --- a/utils/windowfunction/windowfunctiontype.h +++ b/utils/windowfunction/windowfunctiontype.h @@ -31,7 +31,7 @@ #include "returnedcolumn.h" #include "rowgroup.h" #include "windowframe.h" - +#include "constantcolumn.h" namespace ordering { @@ -198,6 +198,8 @@ public: fStep = step; } + void constParms(const std::vector& functionParms); + static boost::shared_ptr makeWindowFunction(const std::string&, int ct, WindowFunctionColumn* wc); protected: @@ -244,6 +246,9 @@ protected: // output and input field indices: [0] - output std::vector fFieldIndex; + // constant function parameters -- needed for udaf with constant + std::vector fConstantParms; + // row meta data rowgroup::RowGroup fRowGroup; rowgroup::Row fRow; diff --git a/utils/winport/win_setup_mysql_part4.sql b/utils/winport/win_setup_mysql_part4.sql index 3b75fbe98..d884214ec 100644 --- a/utils/winport/win_setup_mysql_part4.sql +++ b/utils/winport/win_setup_mysql_part4.sql @@ -18,4 +18,5 @@ CREATE FUNCTION idbextentmin RETURNS STRING soname 'libcalmysql.dll'; CREATE FUNCTION idbextentmax RETURNS STRING soname 'libcalmysql.dll'; CREATE FUNCTION idbpartition RETURNS STRING soname 'libcalmysql.dll'; CREATE FUNCTION idblocalpm RETURNS INTEGER soname 'libcalmysql.dll'; +CREATE AGGREGATE FUNCTION regr_avgx RETURNS REAL soname 'libcalmysql.dll'; diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index 7cd275021..71d0e1fbd 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -1280,7 +1280,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -2025,10 +2025,10 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 0; k < colStructList.size(); k++) + for (size_t k = 0; k < colStructList.size(); k++) { // Skip the selected column - if (k == colId) + if (k == (size_t)colId) continue; Column expandCol; @@ -2583,7 +2583,7 @@ int WriteEngineWrapper::insertColumnRec_SYS(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -3278,7 +3278,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)];