From 6fa7dded6fb9c9ebfc50a244c664d9246c0b8578 Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 11 May 2018 09:50:10 -0500 Subject: [PATCH] MCOL-1201 manual rebase with develop. Obsoletes branch MCOL-1201 --- dbcon/execplan/aggregatecolumn.cpp | 96 +-- dbcon/execplan/aggregatecolumn.h | 44 +- dbcon/joblist/expressionstep.cpp | 12 +- dbcon/joblist/expressionstep.h | 1 + dbcon/joblist/groupconcat.cpp | 2 +- dbcon/joblist/joblistfactory.cpp | 531 ++++++++---- dbcon/joblist/tupleaggregatestep.cpp | 280 +++++-- dbcon/mysql/ha_calpont_execplan.cpp | 858 +++++++++++--------- dbcon/mysql/ha_calpont_impl.cpp | 9 +- dbcon/mysql/ha_window_function.cpp | 37 +- utils/common/any.hpp | 270 +++--- utils/rowgroup/rowaggregation.cpp | 605 +++++++++----- utils/rowgroup/rowaggregation.h | 29 +- utils/udfsdk/CMakeLists.txt | 2 +- utils/udfsdk/allnull.cpp | 7 +- utils/udfsdk/allnull.h | 4 +- utils/udfsdk/avg_mode.cpp | 14 +- utils/udfsdk/avg_mode.h | 14 +- utils/udfsdk/mcsv1_udaf.cpp | 13 +- utils/udfsdk/mcsv1_udaf.h | 88 +- utils/udfsdk/median.cpp | 14 +- utils/udfsdk/median.h | 8 +- utils/udfsdk/ssq.cpp | 14 +- utils/udfsdk/ssq.h | 8 +- utils/udfsdk/udfmysql.cpp | 162 ++++ utils/udfsdk/udfsdk.vpj | 4 + utils/windowfunction/wf_udaf.cpp | 280 +++++-- utils/windowfunction/wf_udaf.h | 27 +- utils/windowfunction/windowfunctiontype.cpp | 8 +- writeengine/wrapper/writeengine.cpp | 10 +- 30 files changed, 2255 insertions(+), 1196 deletions(-) diff --git a/dbcon/execplan/aggregatecolumn.cpp b/dbcon/execplan/aggregatecolumn.cpp index 18cba2607..5bce12d79 100644 --- a/dbcon/execplan/aggregatecolumn.cpp +++ b/dbcon/execplan/aggregatecolumn.cpp @@ -98,36 +98,6 @@ AggregateColumn::AggregateColumn(const uint32_t sessionID): { } -AggregateColumn::AggregateColumn(const AggOp aggOp, ReturnedColumn* parm, const uint32_t sessionID): - ReturnedColumn(sessionID), - fAggOp(aggOp), - fAsc(false), - fData(aggOp + "(" + parm->data() + ")") -{ - fFunctionParms.reset(parm); -} - -AggregateColumn::AggregateColumn(const AggOp aggOp, const string& content, const uint32_t sessionID): - ReturnedColumn(sessionID), - fAggOp(aggOp), - fAsc(false), - fData(aggOp + "(" + content + ")") -{ - // TODO: need to handle distinct - fFunctionParms.reset(new ArithmeticColumn(content)); -} - -// deprecated constructor. use function name as string -AggregateColumn::AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID): - ReturnedColumn(sessionID), - fFunctionName(functionName), - fAggOp(NOOP), - fAsc(false), - fData(functionName + "(" + parm->data() + ")") -{ - fFunctionParms.reset(parm); -} - // deprecated constructor. use function name as string AggregateColumn::AggregateColumn(const string& functionName, const string& content, const uint32_t sessionID): ReturnedColumn(sessionID), @@ -137,20 +107,21 @@ AggregateColumn::AggregateColumn(const string& functionName, const string& conte fData(functionName + "(" + content + ")") { // TODO: need to handle distinct - fFunctionParms.reset(new ArithmeticColumn(content)); + SRCP srcp(new ArithmeticColumn(content)); + fAggParms.push_back(srcp); } AggregateColumn::AggregateColumn( const AggregateColumn& rhs, const uint32_t sessionID ): ReturnedColumn(rhs, sessionID), fFunctionName (rhs.fFunctionName), fAggOp(rhs.fAggOp), - fFunctionParms(rhs.fFunctionParms), fTableAlias(rhs.tableAlias()), fAsc(rhs.asc()), fData(rhs.data()), fConstCol(rhs.fConstCol) { fAlias = rhs.alias(); + fAggParms = rhs.fAggParms; } /** @@ -166,10 +137,14 @@ const string AggregateColumn::toString() const if (fAlias.length() > 0) output << "/Alias: " << fAlias << endl; - if (fFunctionParms == 0) - output << "No arguments" << endl; + if (fAggParms.size() == 0) + output << "No arguments"; else - output << *fFunctionParms << endl; + for (uint32_t i = 0; i < fAggParms.size(); ++i) + { + output << *(fAggParms[i]) << " "; + } + output << endl; if (fConstCol) output << *fConstCol; @@ -191,10 +166,11 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const b << fFunctionName; b << static_cast(fAggOp); - if (fFunctionParms == 0) - b << (uint8_t) ObjectReader::NULL_CLASS; - else - fFunctionParms->serialize(b); + b << static_cast(fAggParms.size()); + for (uint32_t i = 0; i < fAggParms.size(); ++i) + { + fAggParms[i]->serialize(b); + } b << static_cast(fGroupByColList.size()); @@ -219,20 +195,26 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const void AggregateColumn::unserialize(messageqcpp::ByteStream& b) { - ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN); - fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end()); - fProjectColList.erase(fProjectColList.begin(), fProjectColList.end()); - ReturnedColumn::unserialize(b); - b >> fFunctionName; - b >> fAggOp; - //delete fFunctionParms; - fFunctionParms.reset( - dynamic_cast(ObjectReader::createTreeNode(b))); - messageqcpp::ByteStream::quadbyte size; messageqcpp::ByteStream::quadbyte i; ReturnedColumn* rc; + ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN); + fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end()); + fProjectColList.erase(fProjectColList.begin(), fProjectColList.end()); + fAggParms.erase(fAggParms.begin(), fAggParms.end()); + ReturnedColumn::unserialize(b); + b >> fFunctionName; + b >> fAggOp; + + b >> size; + for (i = 0; i < size; i++) + { + rc = dynamic_cast(ObjectReader::createTreeNode(b)); + SRCP srcp(rc); + fAggParms.push_back(srcp); + } + b >> size; for (i = 0; i < size; i++) @@ -261,6 +243,7 @@ void AggregateColumn::unserialize(messageqcpp::ByteStream& b) bool AggregateColumn::operator==(const AggregateColumn& t) const { const ReturnedColumn* rc1, *rc2; + AggParms::const_iterator it, it2; rc1 = static_cast(this); rc2 = static_cast(&t); @@ -277,16 +260,18 @@ bool AggregateColumn::operator==(const AggregateColumn& t) const if (fAggOp != t.fAggOp) return false; - if (fFunctionParms.get() != NULL && t.fFunctionParms.get() != NULL) + if (aggParms().size() != t.aggParms().size()) { - if (*fFunctionParms.get() != t.fFunctionParms.get()) + return false; + } + for (it = fAggParms.begin(), it2 = t.fAggParms.begin(); + it != fAggParms.end(); + ++it, ++it2) + { + if (**it != **it2) return false; } - else if (fFunctionParms.get() != NULL || t.fFunctionParms.get() != NULL) - return false; - //if (fAlias != t.fAlias) - // return false; if (fTableAlias != t.fTableAlias) return false; @@ -645,3 +630,4 @@ AggregateColumn::AggOp AggregateColumn::agname2num(const string& agname) } } // namespace execplan + diff --git a/dbcon/execplan/aggregatecolumn.h b/dbcon/execplan/aggregatecolumn.h index d1db7e5a4..b0884f179 100644 --- a/dbcon/execplan/aggregatecolumn.h +++ b/dbcon/execplan/aggregatecolumn.h @@ -40,6 +40,8 @@ class ByteStream; namespace execplan { +typedef std::vector AggParms; + /** * @brief A class to represent a aggregate return column * @@ -74,7 +76,8 @@ public: BIT_OR, BIT_XOR, GROUP_CONCAT, - UDAF + UDAF, + MULTI_PARM }; /** @@ -94,21 +97,6 @@ public: */ AggregateColumn(const uint32_t sessionID); - /** - * ctor - */ - AggregateColumn(const AggOp aggop, ReturnedColumn* parm, const uint32_t sessionID = 0); - - /** - * ctor - */ - AggregateColumn(const AggOp aggop, const std::string& content, const uint32_t sessionID = 0); - - /** - * ctor - */ - AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID = 0); - /** * ctor */ @@ -155,24 +143,27 @@ public: fAggOp = aggOp; } + /** get function parms - * - * set the function parms from this object */ - virtual const SRCP functionParms() const + virtual AggParms& aggParms() { - return fFunctionParms; + return fAggParms; + } + + virtual const AggParms& aggParms() const + { + return fAggParms; } /** set function parms - * - * set the function parms for this object */ - virtual void functionParms(const SRCP& functionParms) + virtual void aggParms(const AggParms& parms) { - fFunctionParms = functionParms; + fAggParms = parms; } + /** return a copy of this pointer * * deep copy of this pointer and return the copy @@ -325,9 +316,10 @@ protected: uint8_t fAggOp; /** - * A ReturnedColumn objects that are the arguments to this function + * ReturnedColumn objects that are the arguments to this + * function */ - SRCP fFunctionParms; + AggParms fAggParms; /** table alias * A string to represent table alias name which contains this column diff --git a/dbcon/joblist/expressionstep.cpp b/dbcon/joblist/expressionstep.cpp index 0e064c359..4a8a14ff3 100644 --- a/dbcon/joblist/expressionstep.cpp +++ b/dbcon/joblist/expressionstep.cpp @@ -56,6 +56,17 @@ using namespace rowgroup; namespace joblist { +ExpressionStep::ExpressionStep() : + fExpressionFilter(NULL), + fExpressionId(-1), + fVarBinOK(false), + fSelectFilter(false), + fAssociatedJoinId(0), + fDoJoin(false), + fVirtual(false) +{ +} + ExpressionStep::ExpressionStep(const JobInfo& jobInfo) : JobStep(jobInfo), fExpressionFilter(NULL), @@ -68,7 +79,6 @@ ExpressionStep::ExpressionStep(const JobInfo& jobInfo) : { } - ExpressionStep::ExpressionStep(const ExpressionStep& rhs) : JobStep(rhs), fExpression(rhs.expression()), diff --git a/dbcon/joblist/expressionstep.h b/dbcon/joblist/expressionstep.h index 4a069440f..63423fc7d 100644 --- a/dbcon/joblist/expressionstep.h +++ b/dbcon/joblist/expressionstep.h @@ -50,6 +50,7 @@ class ExpressionStep : public JobStep { public: // constructors + ExpressionStep(); ExpressionStep(const JobInfo&); // destructor constructors virtual ~ExpressionStep(); diff --git a/dbcon/joblist/groupconcat.cpp b/dbcon/joblist/groupconcat.cpp index 234fc0a8e..afc91a2ec 100644 --- a/dbcon/joblist/groupconcat.cpp +++ b/dbcon/joblist/groupconcat.cpp @@ -78,7 +78,7 @@ void GroupConcatInfo::prepGroupConcat(JobInfo& jobInfo) while (i != jobInfo.groupConcatCols.end()) { GroupConcatColumn* gcc = dynamic_cast(i->get()); - const RowColumn* rcp = dynamic_cast(gcc->functionParms().get()); + const RowColumn* rcp = dynamic_cast(gcc->aggParms()[0].get()); SP_GroupConcat groupConcat(new GroupConcat); groupConcat->fSeparator = gcc->separator(); diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index a48ecd13a..4cf7bccc5 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -18,7 +18,6 @@ // $Id: joblistfactory.cpp 9632 2013-06-18 22:18:20Z xlou $ - #include #include #include @@ -870,7 +869,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo if (gcc != NULL) { - srcp = gcc->functionParms(); + srcp = gcc->aggParms()[0]; const RowColumn* rcp = dynamic_cast(srcp.get()); const vector& cols = rcp->columnVec(); @@ -891,21 +890,55 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } +#if 0 + // MCOL-1201 Add support for multi-parameter UDAnF + UDAFColumn* udafc = dynamic_cast(retCols[i].get()); + if (udafc != NULL) + { + srcp = udafc->aggParms()[0]; + const RowColumn* rcp = dynamic_cast(srcp.get()); + const vector& cols = rcp->columnVec(); + for (vector::const_iterator j = cols.begin(); j != cols.end(); j++) + { + srcp = *j; + if (dynamic_cast(srcp.get()) == NULL) + retCols.push_back(srcp); + + // Do we need this? + const ArithmeticColumn* ac = dynamic_cast(srcp.get()); + const FunctionColumn* fc = dynamic_cast(srcp.get()); + if (ac != NULL || fc != NULL) + { + // bug 3728, make a dummy expression step for each expression. + scoped_ptr es(new ExpressionStep(jobInfo)); + es->expression(srcp, jobInfo); + } + } + continue; + } +#endif srcp = retCols[i]; const AggregateColumn* ag = dynamic_cast(retCols[i].get()); - - if (ag != NULL) - srcp = ag->functionParms(); - - const ArithmeticColumn* ac = dynamic_cast(srcp.get()); - const FunctionColumn* fc = dynamic_cast(srcp.get()); - - if (ac != NULL || fc != NULL) + // bug 3728 Make a dummy expression for srcp if it is an + // expression. This is needed to fill in some stuff. + // Note that es.expression does nothing if the item is not an expression. + if (ag == NULL) { - // bug 3728, make a dummy expression step for each expression. - scoped_ptr es(new ExpressionStep(jobInfo)); - es->expression(srcp, jobInfo); + // Not an aggregate. Make a dummy expression for the item + ExpressionStep es; + es.expression(srcp, jobInfo); + } + else + { + // MCOL-1201 multi-argument aggregate. make a dummy expression + // step for each argument that is an expression. + for (uint32_t i = 0; i < ag->aggParms().size(); ++i) + { + srcp = ag->aggParms()[i]; + ExpressionStep es; + es.expression(srcp, jobInfo); + } } } @@ -915,17 +948,18 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo { srcp = retCols[i]; const SimpleColumn* sc = dynamic_cast(srcp.get()); + AggregateColumn* aggc = dynamic_cast(srcp.get()); bool doDistinct = (csep->distinct() && csep->groupByCols().empty()); uint32_t tupleKey = -1; string alias; string view; - // returned column could be groupby column, a simplecoulumn not a agregatecolumn + // returned column could be groupby column, a simplecoulumn not an aggregatecolumn int op = 0; CalpontSystemCatalog::OID dictOid = 0; CalpontSystemCatalog::ColType ct, aggCt; - if (sc == NULL) + if (aggc) { GroupConcatColumn* gcc = dynamic_cast(retCols[i].get()); @@ -939,7 +973,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo tupleKey = ti.key; jobInfo.returnedColVec.push_back(make_pair(tupleKey, gcc->aggOp())); // not a tokenOnly column. Mark all the columns involved - srcp = gcc->functionParms(); + srcp = gcc->aggParms()[0]; const RowColumn* rowCol = dynamic_cast(srcp.get()); if (rowCol) @@ -963,186 +997,353 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } - - AggregateColumn* ac = dynamic_cast(retCols[i].get()); - - if (ac != NULL) + else { - srcp = ac->functionParms(); - sc = dynamic_cast(srcp.get()); + // Aggregate column not group concat + AggParms& aggParms = aggc->aggParms(); - if (ac->constCol().get() != NULL) + for (uint32_t parm = 0; parm < aggParms.size(); ++parm) { - // replace the aggregate on constant with a count(*) - SRCP clone; - UDAFColumn* udafc = dynamic_cast(ac); - - if (udafc) + if (aggc->constCol().get() != NULL) { - clone.reset(new UDAFColumn(*udafc, ac->sessionID())); + // replace the aggregate on constant with a count(*) + SRCP clone; + UDAFColumn* udafc = dynamic_cast(aggc); + + if (udafc) + { + clone.reset(new UDAFColumn(*udafc, aggc->sessionID())); + } + else + { + clone.reset(new AggregateColumn(*aggc, aggc->sessionID())); + } + + jobInfo.constAggregate.insert(make_pair(i, clone)); + aggc->aggOp(AggregateColumn::COUNT_ASTERISK); + aggc->distinct(false); + } + + srcp = aggParms[parm]; + sc = dynamic_cast(srcp.get()); + if (parm == 0) + { + op = aggc->aggOp(); } else { - clone.reset(new AggregateColumn(*ac, ac->sessionID())); + op = AggregateColumn::MULTI_PARM; + } + doDistinct = aggc->distinct(); + if (aggParms.size() == 1) + { + // Set the col type based on the single parm. + // Changing col type based on a parm if multiple parms + // doesn't really make sense. + updateAggregateColType(aggc, srcp, op, jobInfo); + } + aggCt = aggc->resultType(); + + // As of bug3695, make sure varbinary is not used in aggregation. + // TODO: allow for UDAF + if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY) + throw runtime_error ("VARBINARY in aggregate function is not supported."); + + // Project the parm columns or expressions + if (sc != NULL) + { + CalpontSystemCatalog::OID retOid = sc->oid(); + CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); + alias = extractTableAlias(sc); + view = sc->viewName(); + + if (!sc->schemaName().empty()) + { + ct = sc->colType(); + + //XXX use this before connector sets colType in sc correctly. + if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) + ct = jobInfo.csc->colType(sc->oid()); + + //X + dictOid = isDictCol(ct); + } + else + { + retOid = (tblOid + 1) + sc->colPosition(); + ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; + } + + TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); + tupleKey = ti.key; + + // this is a string column + if (dictOid > 0) + { + map::iterator findit = jobInfo.tokenOnly.find(tupleKey); + + // if the column has never seen, and the op is count: possible need count only. + if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) + { + if (findit == jobInfo.tokenOnly.end()) + jobInfo.tokenOnly[tupleKey] = true; + } + // if aggregate other than count, token is not enough. + else if (op != 0 || doDistinct) + { + jobInfo.tokenOnly[tupleKey] = false; + } + + findit = jobInfo.tokenOnly.find(tupleKey); + + if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) + { + dictMap[tupleKey] = dictOid; + jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; + ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); + jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + } + } + } + else + { + const ArithmeticColumn* ac = NULL; + const FunctionColumn* fc = NULL; + const WindowFunctionColumn* wc = NULL; + bool hasAggCols = false; + + if ((ac = dynamic_cast(srcp.get())) != NULL) + { + if (ac->aggColumnList().size() > 0) + hasAggCols = true; + } + else if ((fc = dynamic_cast(srcp.get())) != NULL) + { + if (fc->aggColumnList().size() > 0) + hasAggCols = true; + } + else if (dynamic_cast(srcp.get()) != NULL) + { + std::ostringstream errmsg; + errmsg << "Invalid aggregate function nesting."; + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + else if (dynamic_cast(srcp.get()) != NULL) + { + } + else if ((wc = dynamic_cast(srcp.get())) == NULL) + { + std::ostringstream errmsg; + errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + + uint64_t eid = srcp.get()->expressionId(); + ct = srcp.get()->resultType(); + TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); + tupleKey = ti.key; + + if (hasAggCols) + jobInfo.expressionVec.push_back(tupleKey); } - jobInfo.constAggregate.insert(make_pair(i, clone)); - ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ac->distinct(false); - } + // add to project list + vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - op = ac->aggOp(); - doDistinct = ac->distinct(); - updateAggregateColType(ac, srcp, op, jobInfo); - aggCt = ac->resultType(); + if (keyIt == projectKeys.end()) + { + RetColsVector::iterator it = pcv.end(); - // As of bug3695, make sure varbinary is not used in aggregation. - if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY) - throw runtime_error ("VARBINARY in aggregate function is not supported."); - } - } + if (doDistinct) + it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); + else + it = pcv.insert(pcv.end(), srcp); - // simple column selected or aggregated - if (sc != NULL) - { - // one column only need project once - CalpontSystemCatalog::OID retOid = sc->oid(); - CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); - alias = extractTableAlias(sc); - view = sc->viewName(); + projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); + } + else if (doDistinct) // @bug4250, move forward distinct column if necessary. + { + uint32_t pos = distance(projectKeys.begin(), keyIt); - if (!sc->schemaName().empty()) - { - ct = sc->colType(); + if (pos >= lastGroupByPos) + { + pcv[pos] = pcv[lastGroupByPos]; + pcv[lastGroupByPos] = srcp; + projectKeys[pos] = projectKeys[lastGroupByPos]; + projectKeys[lastGroupByPos] = tupleKey; + lastGroupByPos++; + } + } -//XXX use this before connector sets colType in sc correctly. - if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) - ct = jobInfo.csc->colType(sc->oid()); + if (doDistinct && dictOid > 0) + tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; -//X - dictOid = isDictCol(ct); - } - else - { - retOid = (tblOid + 1) + sc->colPosition(); - ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; - } + // remember the columns to be returned + jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); - TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); - tupleKey = ti.key; + if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) + jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; - // this is a string column - if (dictOid > 0) - { - map::iterator findit = jobInfo.tokenOnly.find(tupleKey); - - // if the column has never seen, and the op is count: possible need count only. - if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) - { - if (findit == jobInfo.tokenOnly.end()) - jobInfo.tokenOnly[tupleKey] = true; - } - // if aggregate other than count, token is not enough. - else if (op != 0 || doDistinct) - { - jobInfo.tokenOnly[tupleKey] = false; - } - - findit = jobInfo.tokenOnly.find(tupleKey); - - if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) - { - dictMap[tupleKey] = dictOid; - jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; - ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); - jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } } else { - const ArithmeticColumn* ac = NULL; - const FunctionColumn* fc = NULL; - const WindowFunctionColumn* wc = NULL; - bool hasAggCols = false; - - if ((ac = dynamic_cast(srcp.get())) != NULL) + // Not an Aggregate + // simple column selected + if (sc != NULL) { - if (ac->aggColumnList().size() > 0) - hasAggCols = true; + // one column only need project once + CalpontSystemCatalog::OID retOid = sc->oid(); + CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); + alias = extractTableAlias(sc); + view = sc->viewName(); + + if (!sc->schemaName().empty()) + { + ct = sc->colType(); + + //XXX use this before connector sets colType in sc correctly. + if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) + ct = jobInfo.csc->colType(sc->oid()); + + //X + dictOid = isDictCol(ct); + } + else + { + retOid = (tblOid + 1) + sc->colPosition(); + ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; + } + + TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); + tupleKey = ti.key; + + // this is a string column + if (dictOid > 0) + { + map::iterator findit = jobInfo.tokenOnly.find(tupleKey); + + // if the column has never seen, and the op is count: possible need count only. + if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) + { + if (findit == jobInfo.tokenOnly.end()) + jobInfo.tokenOnly[tupleKey] = true; + } + // if aggregate other than count, token is not enough. + else if (op != 0 || doDistinct) + { + jobInfo.tokenOnly[tupleKey] = false; + } + + findit = jobInfo.tokenOnly.find(tupleKey); + + if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) + { + dictMap[tupleKey] = dictOid; + jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; + ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); + jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + } + } } - else if ((fc = dynamic_cast(srcp.get())) != NULL) - { - if (fc->aggColumnList().size() > 0) - hasAggCols = true; - } - else if (dynamic_cast(srcp.get()) != NULL) - { - std::ostringstream errmsg; - errmsg << "Invalid aggregate function nesting."; - cerr << boldStart << errmsg.str() << boldStop << endl; - throw logic_error(errmsg.str()); - } - else if ((wc = dynamic_cast(srcp.get())) == NULL) - { - std::ostringstream errmsg; - errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); - cerr << boldStart << errmsg.str() << boldStop << endl; - throw logic_error(errmsg.str()); - } - - uint64_t eid = srcp.get()->expressionId(); - ct = srcp.get()->resultType(); - TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); - tupleKey = ti.key; - - if (hasAggCols) - jobInfo.expressionVec.push_back(tupleKey); - } - - // add to project list - vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - - if (keyIt == projectKeys.end()) - { - RetColsVector::iterator it = pcv.end(); - - if (doDistinct) - it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); else - it = pcv.insert(pcv.end(), srcp); - - projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); - } - else if (doDistinct) // @bug4250, move forward distinct column if necessary. - { - uint32_t pos = distance(projectKeys.begin(), keyIt); - - if (pos >= lastGroupByPos) { - pcv[pos] = pcv[lastGroupByPos]; - pcv[lastGroupByPos] = srcp; - projectKeys[pos] = projectKeys[lastGroupByPos]; - projectKeys[lastGroupByPos] = tupleKey; - lastGroupByPos++; + const ArithmeticColumn* ac = NULL; + const FunctionColumn* fc = NULL; + const WindowFunctionColumn* wc = NULL; + bool hasAggCols = false; + + if ((ac = dynamic_cast(srcp.get())) != NULL) + { + if (ac->aggColumnList().size() > 0) + hasAggCols = true; + } + else if ((fc = dynamic_cast(srcp.get())) != NULL) + { + if (fc->aggColumnList().size() > 0) + hasAggCols = true; + } + else if (dynamic_cast(srcp.get()) != NULL) + { + std::ostringstream errmsg; + errmsg << "Invalid aggregate function nesting."; + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + else if (dynamic_cast(srcp.get()) != NULL) + { + } + else if ((wc = dynamic_cast(srcp.get())) == NULL) + { + std::ostringstream errmsg; + errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + + uint64_t eid = srcp.get()->expressionId(); + ct = srcp.get()->resultType(); + TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); + tupleKey = ti.key; + + if (hasAggCols) + jobInfo.expressionVec.push_back(tupleKey); } - } - if (doDistinct && dictOid > 0) - tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + // add to project list + vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - // remember the columns to be returned - jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + if (keyIt == projectKeys.end()) + { + RetColsVector::iterator it = pcv.end(); - if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) - jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + if (doDistinct) + it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); + else + it = pcv.insert(pcv.end(), srcp); - // bug 1499 distinct processing, save unique distinct columns - if (doDistinct && - (jobInfo.distinctColVec.end() == - find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) - { - jobInfo.distinctColVec.push_back(tupleKey); + projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); + } + else if (doDistinct) // @bug4250, move forward distinct column if necessary. + { + uint32_t pos = distance(projectKeys.begin(), keyIt); + + if (pos >= lastGroupByPos) + { + pcv[pos] = pcv[lastGroupByPos]; + pcv[lastGroupByPos] = srcp; + projectKeys[pos] = projectKeys[lastGroupByPos]; + projectKeys[lastGroupByPos] = tupleKey; + lastGroupByPos++; + } + } + + if (doDistinct && dictOid > 0) + tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + + // remember the columns to be returned + jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + + if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) + jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 9e23ac17b..ff490da5b 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -164,6 +164,9 @@ inline RowAggFunctionType functionIdMap(int planFuncId) case AggregateColumn::UDAF: return ROWAGG_UDAF; + case AggregateColumn::MULTI_PARM: + return ROWAGG_MULTI_PARM; + default: return ROWAGG_FUNCT_UNDEFINE; } @@ -1302,7 +1305,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -1468,7 +1471,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (!udafFuncCol) { - throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); @@ -1483,6 +1486,17 @@ void TupleAggregateStep::prep1PhaseAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(key); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + widthAgg.push_back(width[colProj]); + } + break; + default: { ostringstream emsg; @@ -1560,7 +1574,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(3)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVec[i]->fAuxColumnIndex = lastCol++; @@ -1675,7 +1689,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation map projColPosMap; @@ -1848,7 +1862,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -2043,7 +2057,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (!udafFuncCol) { - throw logic_error("prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); } // Return column @@ -2065,6 +2079,18 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(aggKey); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + widthAgg.push_back(widthProj[colProj]); + ++colAgg; + } + break; + default: { ostringstream emsg; @@ -2111,7 +2137,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( groupByNoDist.push_back(groupby); aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i)); } - + + projColsUDAFIndex = 0; // locate the return column position in aggregated rowgroup for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -2121,6 +2148,14 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colAgg = -1; + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + + if (udafc) + pUDAFFunc = udafc->getContext().getFunction(); + } + if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { @@ -2432,11 +2467,37 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { - SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + // update the aggregate function vector + SP_ROWAGG_FUNC_t funct; + if (aggOp == ROWAGG_UDAF) + { + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } + } + else + { + funct.reset(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + } if (aggOp == ROWAGG_COUNT_NO_OP) funct->fAuxColumnIndex = colAgg; @@ -2549,7 +2610,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep1PhaseDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVec2[i]->fAuxColumnIndex = lastCol++; @@ -2893,7 +2954,7 @@ void TupleAggregateStep::prep2PhasesAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation vector width; map projColPosMap; @@ -3036,12 +3097,11 @@ void TupleAggregateStep::prep2PhasesAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAggPm)); break; } - } if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -3240,7 +3300,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } oidsAggPm.push_back(oidsProj[colProj]); @@ -3261,6 +3321,18 @@ void TupleAggregateStep::prep2PhasesAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(scaleProj[colProj]); + precisionAggPm.push_back(precisionProj[colProj]); + typeAggPm.push_back(typeProj[colProj]); + widthAggPm.push_back(width[colProj]); + colAggPm++; + } + break; + default: { ostringstream emsg; @@ -3278,11 +3350,16 @@ void TupleAggregateStep::prep2PhasesAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap; AGG_MAP aggDupFuncMap; + projColsUDAFIndex = 0; // copy over the groupby vector // update the outputColumnIndex if returned for (uint64_t i = 0; i < groupByPm.size(); i++) @@ -3299,7 +3376,14 @@ void TupleAggregateStep::prep2PhasesAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colPm = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + ++multiParms; + continue; + } // Is this a UDAF? use the function as part of the key. + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; if (aggOp == ROWAGG_UDAF) @@ -3452,20 +3536,36 @@ void TupleAggregateStep::prep2PhasesAggregate( functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { + // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i)); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i-multiParms)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i-multiParms)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -3517,7 +3617,7 @@ void TupleAggregateStep::prep2PhasesAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = returnedColVec.size() - multiParms; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -3545,7 +3645,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; @@ -3691,6 +3791,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector groupByPm, groupByUm, groupByNoDist; vector functionVecPm, functionNoDistVec, functionVecUm; + list multiParmIndexes; uint32_t bigIntWidth = sizeof(int64_t); map, uint64_t> avgFuncDistMap; @@ -3702,7 +3803,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation vector width; map projColPosMap; @@ -3856,7 +3957,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -4050,7 +4151,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } // Return column @@ -4072,6 +4173,19 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(scaleProj[colProj]); + precisionAggPm.push_back(precisionProj[colProj]); + typeAggPm.push_back(typeProj[colProj]); + widthAggPm.push_back(width[colProj]); + multiParmIndexes.push_back(colAggPm); + colAggPm++; + } + break; + default: { ostringstream emsg; @@ -4093,12 +4207,23 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( groupByUm.push_back(groupby); } + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; for (uint32_t idx = 0; idx < functionVecPm.size(); idx++) + { SP_ROWAGG_FUNC_t funct; SP_ROWAGG_FUNC_t funcPm = functionVecPm[idx]; // UDAF support + if (funcPm->fAggFunction == ROWAGG_MULTI_PARM) + { + // Multi-Parm is not used on the UM + ++multiParms; + continue; + } if (funcPm->fAggFunction == ROWAGG_UDAF) { RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); @@ -4106,7 +4231,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); } else @@ -4116,18 +4241,25 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funcPm->fStatsFunction, funcPm->fOutputColumnIndex, funcPm->fOutputColumnIndex, - funcPm->fAuxColumnIndex)); + funcPm->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); } } - posAggUm = posAggPm; - oidsAggUm = oidsAggPm; - keysAggUm = keysAggPm; - scaleAggUm = scaleAggPm; - precisionAggUm = precisionAggPm; - widthAggUm = widthAggPm; - typeAggUm = typeAggPm; + // Copy over the PM arrays to the UM. Skip any that are a multi-parm entry. + for (uint32_t idx = 0; idx < oidsAggPm.size(); ++idx) + { + if (find (multiParmIndexes.begin(), multiParmIndexes.end(), idx ) != multiParmIndexes.end()) + { + continue; + } + oidsAggUm.push_back(oidsAggPm[idx]); + keysAggUm.push_back(keysAggPm[idx]); + scaleAggUm.push_back(scaleAggPm[idx]); + precisionAggUm.push_back(precisionAggPm[idx]); + widthAggUm.push_back(widthAggPm[idx]); + typeAggUm.push_back(typeAggPm[idx]); + } } @@ -4137,6 +4269,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4159,6 +4295,21 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colUm = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + ++multiParms; + continue; + } + + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + + if (udafc) + pUDAFFunc = udafc->getContext().getFunction(); + } + if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { @@ -4285,7 +4436,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it != aggFuncMap.end()) { - colUm = it->second; + colUm = it->second - multiParms; oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(keysAggUm[colUm]); scaleAggDist.push_back(scaleAggUm[colUm]); @@ -4309,7 +4460,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // false alarm returnColMissing = false; - colUm = it->second; + colUm = it->second - multiParms; if (aggOp == ROWAGG_SUM) { @@ -4412,21 +4563,36 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { + // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - pUDAFFunc = udafc->getContext().getFunction(); - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i)); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i-multiParms)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i-multiParms)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -4480,7 +4646,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = returnedColVec.size() - multiParms; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -4540,7 +4706,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; @@ -4687,6 +4853,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); groupBySub.push_back(groupby); + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; + // tricky part : 2 function vectors // -- dummy function vector for sub-aggregator, which does distinct only // -- aggregate function on this distinct column for rowAggDist @@ -4694,6 +4865,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } if (returnedColVec[k].first != distinctColKey) continue; @@ -4715,7 +4891,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -4732,9 +4908,15 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( { vector functionSub1 = functionNoDistVec; vector functionSub2; + int64_t multiParms = 0; for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } // search non-distinct functions in functionVec vector::iterator it = functionVecUm.begin(); @@ -4752,7 +4934,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } else if (f->fAggFunction == ROWAGG_COUNT_ASTERISK || @@ -4773,7 +4955,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 271508f42..9150d5393 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4038,6 +4038,10 @@ ParseTree* buildParseTree(Item_func* item, gp_walk_info& gwi, bool& nonSupport) ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { + // MCOL-1201 For UDAnF multiple parameters + vector selCols; + vector orderCols; + if (!(gwi.thd->infinidb_vtable.cal_conn_info)) gwi.thd->infinidb_vtable.cal_conn_info = (void*)(new cal_connection_info()); @@ -4054,6 +4058,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // N.B. argument_count() is the # of formal parms to the agg fcn. InifniDB only supports 1 argument // TODO: Support more than one parm +#if 0 if (isp->argument_count() != 1 && isp->sum_func() != Item_sum::GROUP_CONCAT_FUNC && isp->sum_func() != Item_sum::UDF_SUM_FUNC) { @@ -4061,7 +4066,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_MUL_ARG_AGG); return NULL; } - +#endif AggregateColumn* ac = NULL; if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) @@ -4084,444 +4089,509 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { gwi.fatalParseError = true; gwi.parseErrorText = "Non supported aggregate type on the select clause"; + if (ac) + delete ac; return NULL; } - // special parsing for group_concat - if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + try { - Item_func_group_concat* gc = (Item_func_group_concat*)isp; + + // special parsing for group_concat + if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + Item_func_group_concat* gc = (Item_func_group_concat*)isp; vector orderCols; - RowColumn* rowCol = new RowColumn(); + RowColumn* rowCol = new RowColumn(); vector selCols; - uint32_t select_ctn = gc->count_field(); - ReturnedColumn* rc = NULL; + uint32_t select_ctn = gc->count_field(); + ReturnedColumn* rc = NULL; - for (uint32_t i = 0; i < select_ctn; i++) - { - rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); - - if (!rc || gwi.fatalParseError) - return NULL; - - selCols.push_back(SRCP(rc)); - } - - ORDER** order_item, **end; - - for (order_item = gc->get_order(), - end = order_item + gc->order_field(); order_item < end; - order_item++) - { - Item* ord_col = *(*order_item)->item; - - if (ord_col->type() == Item::INT_ITEM) + for (uint32_t i = 0; i < select_ctn; i++) { - Item_int* id = (Item_int*)ord_col; - - if (id->val_int() > (int)selCols.size()) - { - gwi.fatalParseError = true; - return NULL; - } - - rc = selCols[id->val_int() - 1]->clone(); - rc->orderPos(id->val_int() - 1); - } - else - { - rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); + rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); if (!rc || gwi.fatalParseError) { + if (ac) + delete ac; return NULL; } + + selCols.push_back(SRCP(rc)); } - // 10.2 TODO: direction is now a tri-state flag - rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); - orderCols.push_back(SRCP(rc)); - } + ORDER** order_item, **end; - rowCol->columnVec(selCols); - (dynamic_cast(ac))->orderCols(orderCols); - parm.reset(rowCol); - - if (gc->str_separator()) - { - string separator; - separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); - (dynamic_cast(ac))->separator(separator); - } - } - else - { - for (uint32_t i = 0; i < isp->argument_count(); i++) - { - Item* sfitemp = sfitempp[i]; - Item::Type sfitype = sfitemp->type(); - - switch (sfitype) + for (order_item = gc->get_order(), + end = order_item + gc->order_field(); order_item < end; + order_item++) { - case Item::FIELD_ITEM: - { - Item_field* ifp = reinterpret_cast(sfitemp); - SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + Item* ord_col = *(*order_item)->item; - if (!sc) + if (ord_col->type() == Item::INT_ITEM) + { + Item_int* id = (Item_int*)ord_col; + + if (id->val_int() > (int)selCols.size()) { gwi.fatalParseError = true; - break; + if (ac) + delete ac; + return NULL; } - parm.reset(sc); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); - TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); - gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); - break; + rc = selCols[id->val_int() - 1]->clone(); + rc->orderPos(id->val_int() - 1); } - - case Item::INT_ITEM: - case Item::STRING_ITEM: - case Item::REAL_ITEM: - case Item::DECIMAL_ITEM: + else { - // treat as count(*) - if (ac->aggOp() == AggregateColumn::COUNT) - ac->aggOp(AggregateColumn::COUNT_ASTERISK); + rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::NULL_ITEM: - { - //ac->aggOp(AggregateColumn::COUNT); - parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); - //ac->functionParms(parm); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::FUNC_ITEM: - { - Item_func* ifp = (Item_func*)sfitemp; - ReturnedColumn* rc = 0; - - // check count(1+1) case - vector tmpVec; - uint16_t parseInfo = 0; - parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); - - if (parseInfo & SUB_BIT) + if (!rc || gwi.fatalParseError) { - gwi.fatalParseError = true; - break; - } - else if (!gwi.fatalParseError && - !(parseInfo & AGG_BIT) && - !(parseInfo & AF_BIT) && - tmpVec.size() == 0) - { - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - FunctionColumn* fc = dynamic_cast(rc); - - if ((fc && fc->functionParms().empty()) || !fc) - { - //ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (dynamic_cast(rc)) - { - //@bug5229. handle constant function on aggregate argument - ac->constCol(SRCP(rc)); - break; - } - } - } - - // MySQL carelessly allows correlated aggregate function on the WHERE clause. - // Here is the work around to deal with that inconsistence. - // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; - ClauseType clauseType = gwi.clauseType; - - if (gwi.clauseType == WHERE) - gwi.clauseType = HAVING; - - // @bug 3603. for cases like max(rand()). try to build function first. - if (!rc) - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - - parm.reset(rc); - gwi.clauseType = clauseType; - - if (gwi.fatalParseError) - break; - - //ac->functionParms(parm); - break; - } - - case Item::REF_ITEM: - { - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (rc) - { - parm.reset(rc); - //ac->functionParms(parm); - break; + if (ac) + delete ac; + return NULL; } } - default: - { - gwi.fatalParseError = true; - //gwi.parseErrorText = "Non-supported Item in Aggregate function"; - } + // 10.2 TODO: direction is now a tri-state flag + rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); + orderCols.push_back(SRCP(rc)); } - if (gwi.fatalParseError) + rowCol->columnVec(selCols); + (dynamic_cast(ac))->orderCols(orderCols); + parm.reset(rowCol); + + if (gc->str_separator()) { - if (gwi.parseErrorText.empty()) - { - Message::Args args; - - if (item->name) - args.add(item->name); - else - args.add(""); - - gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); - } - - return NULL; + string separator; + separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); + (dynamic_cast(ac))->separator(separator); } } - } - - if (parm) - { - ac->functionParms(parm); - - if (isp->sum_func() == Item_sum::AVG_FUNC || - isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::DECIMAL; - ct.colWidth = 8; - ct.scale += 4; - break; - -#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; -#endif - - default: - break; - } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::COUNT_FUNC || - isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = parm->resultType().scale; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_FUNC || - isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - ct.colDataType = CalpontSystemCatalog::BIGINT; - - // no break, let fall through - - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - ct.colWidth = 8; - break; - - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::UBIGINT; - ct.colWidth = 8; - break; - -#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; -#endif - - default: - break; - } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::STD_FUNC || - isp->sum_func() == Item_sum::VARIANCE_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - ct.scale = 0; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = 0; - ct.precision = -16; // borrowed to indicate skip null value check on connector - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) - { - //Item_func_group_concat* gc = (Item_func_group_concat*)isp; - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::VARCHAR; - ct.colWidth = isp->max_length; - ct.precision = 0; - ac->resultType(ct); - } else { - ac->resultType(parm->resultType()); + for (uint32_t i = 0; i < isp->argument_count(); i++) + { + Item* sfitemp = sfitempp[i]; + Item::Type sfitype = sfitemp->type(); + + switch (sfitype) + { + case Item::FIELD_ITEM: + { + Item_field* ifp = reinterpret_cast(sfitemp); + SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + + if (!sc) + { + gwi.fatalParseError = true; + break; + } + + parm.reset(sc); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); + TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); + gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); + break; + } + + case Item::INT_ITEM: + case Item::STRING_ITEM: + case Item::REAL_ITEM: + case Item::DECIMAL_ITEM: + { + // treat as count(*) + if (ac->aggOp() == AggregateColumn::COUNT) + ac->aggOp(AggregateColumn::COUNT_ASTERISK); + + ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + break; + } + + case Item::NULL_ITEM: + { + parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); + ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + break; + } + + case Item::FUNC_ITEM: + { + Item_func* ifp = (Item_func*)sfitemp; + ReturnedColumn* rc = 0; + + // check count(1+1) case + vector tmpVec; + uint16_t parseInfo = 0; + parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); + + if (parseInfo & SUB_BIT) + { + gwi.fatalParseError = true; + break; + } + else if (!gwi.fatalParseError && + !(parseInfo & AGG_BIT) && + !(parseInfo & AF_BIT) && + tmpVec.size() == 0) + { + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + FunctionColumn* fc = dynamic_cast(rc); + + if ((fc && fc->functionParms().empty()) || !fc) + { + //ac->aggOp(AggregateColumn::COUNT_ASTERISK); + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (dynamic_cast(rc)) + { + //@bug5229. handle constant function on aggregate argument + ac->constCol(SRCP(rc)); + break; + } + } + } + + // MySQL carelessly allows correlated aggregate function on the WHERE clause. + // Here is the work around to deal with that inconsistence. + // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; + ClauseType clauseType = gwi.clauseType; + + if (gwi.clauseType == WHERE) + gwi.clauseType = HAVING; + + // @bug 3603. for cases like max(rand()). try to build function first. + if (!rc) + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + + parm.reset(rc); + gwi.clauseType = clauseType; + + if (gwi.fatalParseError) + break; + + break; + } + + case Item::REF_ITEM: + { + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (rc) + { + parm.reset(rc); + break; + } + } + + default: + { + gwi.fatalParseError = true; + //gwi.parseErrorText = "Non-supported Item in Aggregate function"; + } + } + + if (gwi.fatalParseError) + { + if (gwi.parseErrorText.empty()) + { + Message::Args args; + + if (item->name) + args.add(item->name); + else + args.add(""); + + gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); + } + + if (ac) + delete ac; + return NULL; + } + if (parm) + { + // MCOL-1201 multi-argument aggregate + ac->aggParms().push_back(parm); + } + } } - } - else - { - ac->resultType(colType_MysqlToIDB(isp)); - } - // adjust decimal result type according to internalDecimalScale - if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) - { - CalpontSystemCatalog::ColType ct = ac->resultType(); - ct.scale = gwi.internalDecimalScale; - ac->resultType(ct); - } - - // check for same aggregate on the select list - ac->expressionId(ci->expressionId++); - - if (gwi.clauseType != SELECT) - { - for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) + // Get result type + // Modified for MCOL-1201 multi-argument aggregate + if (ac->aggParms().size() > 0) { - if (*ac == gwi.returnedCols[i].get()) - ac->expressionId(gwi.returnedCols[i]->expressionId()); - } - } + // These are all one parm functions, so we can safely + // use the first parm for result type. + parm = ac->aggParms()[0]; + if (isp->sum_func() == Item_sum::AVG_FUNC || + isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); - // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will - // be applied in ExeMgr. When the ExeMgr fix is available, this checking - // will be taken out. - if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + switch (ct.colDataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::DECIMAL; + ct.colWidth = 8; + ct.scale += 4; + break; + + #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; + #endif + + default: + break; + } + + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::COUNT_FUNC || + isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = parm->resultType().scale; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_FUNC || + isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); + + switch (ct.colDataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + ct.colDataType = CalpontSystemCatalog::BIGINT; + + // no break, let fall through + + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + ct.colWidth = 8; + break; + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::UBIGINT; + ct.colWidth = 8; + break; + + #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; + #endif + + default: + break; + } + + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::STD_FUNC || + isp->sum_func() == Item_sum::VARIANCE_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + ct.scale = 0; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = 0; + ct.precision = -16; // borrowed to indicate skip null value check on connector + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + //Item_func_group_concat* gc = (Item_func_group_concat*)isp; + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::VARCHAR; + ct.colWidth = isp->max_length; + ct.precision = 0; + ac->resultType(ct); + } + else + { + // UDAF result type will be set below. + ac->resultType(parm->resultType()); + } + } + else + { + ac->resultType(colType_MysqlToIDB(isp)); + } + + // adjust decimal result type according to internalDecimalScale + if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) + { + CalpontSystemCatalog::ColType ct = ac->resultType(); + ct.scale = gwi.internalDecimalScale; + ac->resultType(ct); + } + + // check for same aggregate on the select list + ac->expressionId(ci->expressionId++); + + if (gwi.clauseType != SELECT) + { + for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) + { + if (*ac == gwi.returnedCols[i].get()) + ac->expressionId(gwi.returnedCols[i]->expressionId()); + } + } + + // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will + // be applied in ExeMgr. When the ExeMgr fix is available, this checking + // will be taken out. + if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "No project column found for aggregate function"; + if (ac) + delete ac; + return NULL; + } + else if (ac->constCol()) + { + gwi.count_asterisk_list.push_back(ac); + } + + // For UDAF, populate the context and call the UDAF init() function. + // The return type is (should be) set in context by init(). + if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) + { + UDAFColumn* udafc = dynamic_cast(ac); + + if (udafc) + { + mcsv1Context& context = udafc->getContext(); + context.setName(isp->func_name()); + + // Set up the return type defaults for the call to init() + context.setResultType(udafc->resultType().colDataType); + context.setColWidth(udafc->resultType().colWidth); + context.setScale(udafc->resultType().scale); + context.setPrecision(udafc->resultType().precision); + + context.setParamCount(udafc->aggParms().size()); + ColumnDatum colType; + ColumnDatum colTypes[udafc->aggParms().size()]; + // Build the column type vector. + // Modified for MCOL-1201 multi-argument aggregate + for (uint32_t i = 0; i < udafc->aggParms().size(); ++i) + { + const execplan::CalpontSystemCatalog::ColType& resultType + = udafc->aggParms()[i]->resultType(); + colType.dataType = resultType.colDataType; + colType.precision = resultType.precision; + colType.scale = resultType.scale; + colTypes[i] = colType; + } + + // Call the user supplied init() + mcsv1sdk::mcsv1_UDAF* udaf = context.getFunction(); + if (!udaf) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "Aggregate Function " + context.getName() + " doesn't exist in the ColumnStore engine"; + if (ac) + delete ac; + return NULL; + } + if (udaf->init(&context, colTypes) == mcsv1_UDAF::ERROR) + { + gwi.fatalParseError = true; + gwi.parseErrorText = udafc->getContext().getErrorMessage(); + if (ac) + delete ac; + return NULL; + } + + // UDAF_OVER_REQUIRED means that this function is for Window + // Function only. Reject it here in aggregate land. + if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) + { + gwi.fatalParseError = true; + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, + context.getName()); + if (ac) + delete ac; + return NULL; + } + + // Set the return type as set in init() + CalpontSystemCatalog::ColType ct; + ct.colDataType = context.getResultType(); + ct.colWidth = context.getColWidth(); + ct.scale = context.getScale(); + ct.precision = context.getPrecision(); + udafc->resultType(ct); + } + } + + } + catch (std::logic_error e) { gwi.fatalParseError = true; - gwi.parseErrorText = "No project column found for aggregate function"; + gwi.parseErrorText = "error building Aggregate Function: "; + gwi.parseErrorText += e.what(); + if (ac) + delete ac; return NULL; } - else if (ac->constCol()) + catch (...) { - gwi.count_asterisk_list.push_back(ac); + gwi.fatalParseError = true; + gwi.parseErrorText = "error building Aggregate Function: Unspecified exception"; + if (ac) + delete ac; + return NULL; } - - // For UDAF, populate the context and call the UDAF init() function. - if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) - { - UDAFColumn* udafc = dynamic_cast(ac); - - if (udafc) - { - mcsv1Context& context = udafc->getContext(); - context.setName(isp->func_name()); - - // Set up the return type defaults for the call to init() - context.setResultType(udafc->resultType().colDataType); - context.setColWidth(udafc->resultType().colWidth); - context.setScale(udafc->resultType().scale); - context.setPrecision(udafc->resultType().precision); - - COL_TYPES colTypes; - execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; - - // Build the column type vector. For now, there is only one - colTypes.push_back(make_pair(udafc->functionParms()->alias(), udafc->functionParms()->resultType().colDataType)); - - // Call the user supplied init() - if (context.getFunction()->init(&context, colTypes) == mcsv1_UDAF::ERROR) - { - gwi.fatalParseError = true; - gwi.parseErrorText = udafc->getContext().getErrorMessage(); - return NULL; - } - - if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) - { - gwi.fatalParseError = true; - gwi.parseErrorText = - logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, - context.getName()); - return NULL; - } - - // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; - ct.colDataType = context.getResultType(); - ct.colWidth = context.getColWidth(); - ct.scale = context.getScale(); - ct.precision = context.getPrecision(); - udafc->resultType(ct); - } - } - return ac; } @@ -7839,7 +7909,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->functionParms(minSc); + (*coliter)->aggParms().push_back(minSc); } std::vector::iterator funciter; @@ -9949,7 +10019,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->functionParms(minSc); + (*coliter)->aggParms().push_back(minSc); } std::vector::iterator funciter; diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 1ee343e90..b39da4ea2 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -781,8 +781,11 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, bool h //double double_val = *(double*)(&value); //f2->store(double_val); - if (f2->decimals() < (uint32_t)row.getScale(s)) - f2->dec = (uint32_t)row.getScale(s); + if ((f2->decimals() == DECIMAL_NOT_SPECIFIED && row.getScale(s) > 0) + || f2->decimals() < row.getScale(s)) + { + f2->dec = row.getScale(s); + } f2->store(dl); @@ -5275,8 +5278,6 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE execplan::CalpontSelectExecutionPlan::ColumnMap::iterator colMapIter; execplan::CalpontSelectExecutionPlan::ColumnMap::iterator condColMapIter; execplan::ParseTree* ptIt; - execplan::ReturnedColumn* rcIt; - for (TABLE_LIST* tl = gi.groupByTables; tl; tl = tl->next_local) { mapiter = ci->tableMap.find(tl->table); diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp index 4b648cb15..8d68a6260 100644 --- a/dbcon/mysql/ha_window_function.cpp +++ b/dbcon/mysql/ha_window_function.cpp @@ -340,6 +340,7 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n ac->distinct(item_sum->has_with_distinct()); Window_spec* win_spec = wf->window_spec; SRCP srcp; + CalpontSystemCatalog::ColType ct; // For return type // arguments vector funcParms; @@ -370,18 +371,25 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n context.setColWidth(rt.colWidth); context.setScale(rt.scale); context.setPrecision(rt.precision); + context.setParamCount(funcParms.size()); + + mcsv1sdk::ColumnDatum colType; + mcsv1sdk::ColumnDatum colTypes[funcParms.size()]; // Turn on the Analytic flag so the function is aware it is being called // as a Window Function. context.setContextFlag(CONTEXT_IS_ANALYTIC); - COL_TYPES colTypes; - execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; - // Build the column type vector. + // Modified for MCOL-1201 multi-argument aggregate for (size_t i = 0; i < funcParms.size(); ++i) { - colTypes.push_back(make_pair(funcParms[i]->alias(), funcParms[i]->resultType().colDataType)); + const execplan::CalpontSystemCatalog::ColType& resultType + = funcParms[i]->resultType(); + colType.dataType = resultType.colDataType; + colType.precision = resultType.precision; + colType.scale = resultType.scale; + colTypes[i] = colType; } // Call the user supplied init() @@ -401,7 +409,6 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n } // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; ct.colDataType = context.getResultType(); ct.colWidth = context.getColWidth(); ct.scale = context.getScale(); @@ -419,10 +426,10 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n { case Item_sum::UDF_SUM_FUNC: { - uint64_t bIgnoreNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)); - char sIgnoreNulls[18]; - sprintf(sIgnoreNulls, "%lu", bIgnoreNulls); - srcp.reset(new ConstantColumn(sIgnoreNulls, (uint64_t)bIgnoreNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT + uint64_t bRespectNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) ? 0 : 1; + char sRespectNulls[18]; + sprintf(sRespectNulls, "%lu", bRespectNulls); + srcp.reset(new ConstantColumn(sRespectNulls, (uint64_t)bRespectNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT funcParms.push_back(srcp); break; } @@ -881,11 +888,13 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n return NULL; } - ac->resultType(colType_MysqlToIDB(item_sum)); - - // bug5736. Make the result type double for some window functions when - // infinidb_double_for_decimal_math is set. - ac->adjustResultType(); + if (item_sum->sum_func() != Item_sum::UDF_SUM_FUNC) + { + ac->resultType(colType_MysqlToIDB(item_sum)); + // bug5736. Make the result type double for some window functions when + // infinidb_double_for_decimal_math is set. + ac->adjustResultType(); + } ac->expressionId(ci->expressionId++); diff --git a/utils/common/any.hpp b/utils/common/any.hpp index be0ca679b..5408c5c87 100755 --- a/utils/common/any.hpp +++ b/utils/common/any.hpp @@ -9,123 +9,142 @@ * http://www.boost.org/LICENSE_1_0.txt */ +#include #include namespace static_any { namespace anyimpl { + struct bad_any_cast + { + }; - struct bad_any_cast - { - }; + struct empty_any + { + }; - struct empty_any - { - }; + struct base_any_policy + { + virtual void static_delete(void** x) = 0; + virtual void copy_from_value(void const* src, void** dest) = 0; + virtual void clone(void* const* src, void** dest) = 0; + virtual void move(void* const* src, void** dest) = 0; + virtual void* get_value(void** src) = 0; + virtual size_t get_size() = 0; + }; - struct base_any_policy - { - virtual void static_delete(void** x) = 0; - virtual void copy_from_value(void const* src, void** dest) = 0; - virtual void clone(void* const* src, void** dest) = 0; - virtual void move(void* const* src, void** dest) = 0; - virtual void* get_value(void** src) = 0; - virtual size_t get_size() = 0; - }; + template + struct typed_base_any_policy : base_any_policy + { + virtual size_t get_size() + { + return sizeof(T); + } + }; - template - struct typed_base_any_policy : base_any_policy - { - virtual size_t get_size() { return sizeof(T); } - }; + template + struct small_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) + { + } + virtual void copy_from_value(void const* src, void** dest) + { + new(dest) T(*reinterpret_cast(src)); + } + virtual void clone(void* const* src, void** dest) + { + *dest = *src; + } + virtual void move(void* const* src, void** dest) + { + *dest = *src; + } + virtual void* get_value(void** src) + { + return reinterpret_cast(src); + } + }; - template - struct small_any_policy : typed_base_any_policy - { - virtual void static_delete(void** x) { } - virtual void copy_from_value(void const* src, void** dest) - { new(dest) T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) { *dest = *src; } - virtual void move(void* const* src, void** dest) { *dest = *src; } - virtual void* get_value(void** src) { return reinterpret_cast(src); } - }; - - template - struct big_any_policy : typed_base_any_policy - { - virtual void static_delete(void** x) + template + struct big_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) { if (*x) - delete(*reinterpret_cast(x)); + delete(*reinterpret_cast(x)); *x = NULL; } - virtual void copy_from_value(void const* src, void** dest) + virtual void copy_from_value(void const* src, void** dest) { - *dest = new T(*reinterpret_cast(src)); + *dest = new T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) + virtual void clone(void* const* src, void** dest) { - *dest = new T(**reinterpret_cast(src)); + *dest = new T(**reinterpret_cast(src)); } - virtual void move(void* const* src, void** dest) + virtual void move(void* const* src, void** dest) { - (*reinterpret_cast(dest))->~T(); - **reinterpret_cast(dest) = **reinterpret_cast(src); + (*reinterpret_cast(dest))->~T(); + **reinterpret_cast(dest) = **reinterpret_cast(src); } - virtual void* get_value(void** src) { return *src; } - }; + virtual void* get_value(void** src) + { + return *src; + } + }; - template - struct choose_policy - { - typedef big_any_policy type; - }; + template + struct choose_policy + { + typedef big_any_policy type; + }; - template - struct choose_policy - { - typedef small_any_policy type; - }; + template + struct choose_policy + { + typedef small_any_policy type; + }; - struct any; + struct any; - /// Choosing the policy for an any type is illegal, but should never happen. - /// This is designed to throw a compiler error. - template<> - struct choose_policy - { - typedef void type; - }; + /// Choosing the policy for an any type is illegal, but should never happen. + /// This is designed to throw a compiler error. + template<> + struct choose_policy + { + typedef void type; + }; - /// Specializations for small types. - #define SMALL_POLICY(TYPE) template<> struct \ - choose_policy { typedef small_any_policy type; }; + /// Specializations for small types. +#define SMALL_POLICY(TYPE) template<> struct \ + choose_policy { typedef small_any_policy type; }; - SMALL_POLICY(char); - SMALL_POLICY(signed char); - SMALL_POLICY(unsigned char); - SMALL_POLICY(signed short); - SMALL_POLICY(unsigned short); - SMALL_POLICY(signed int); - SMALL_POLICY(unsigned int); - SMALL_POLICY(signed long); - SMALL_POLICY(unsigned long); - SMALL_POLICY(signed long long); - SMALL_POLICY(unsigned long long); - SMALL_POLICY(float); - SMALL_POLICY(double); - SMALL_POLICY(bool); + SMALL_POLICY(char); + SMALL_POLICY(signed char); + SMALL_POLICY(unsigned char); + SMALL_POLICY(signed short); + SMALL_POLICY(unsigned short); + SMALL_POLICY(signed int); + SMALL_POLICY(unsigned int); + SMALL_POLICY(signed long); + SMALL_POLICY(unsigned long); + SMALL_POLICY(signed long long); + SMALL_POLICY(unsigned long long); + SMALL_POLICY(float); + SMALL_POLICY(double); + SMALL_POLICY(bool); - #undef SMALL_POLICY +#undef SMALL_POLICY - /// This function will return a different policy for each type. - template - base_any_policy* get_policy() - { - static typename choose_policy::type policy; - return &policy; - }; + /// This function will return a different policy for each type. + template + base_any_policy* get_policy() + { + static typename choose_policy::type policy; + return &policy; + }; } class any @@ -139,37 +158,40 @@ public: /// Initializing constructor. template any(const T& x) - : policy(anyimpl::get_policy()), object(NULL) + : policy(anyimpl::get_policy()), object(NULL) { assign(x); } /// Empty constructor. any() - : policy(anyimpl::get_policy()), object(NULL) - { } + : policy(anyimpl::get_policy()), object(NULL) + { + } /// Special initializing constructor for string literals. any(const char* x) - : policy(anyimpl::get_policy()), object(NULL) - { + : policy(anyimpl::get_policy()), object(NULL) + { assign(x); } /// Copy constructor. any(const any& x) - : policy(anyimpl::get_policy()), object(NULL) - { + : policy(anyimpl::get_policy()), object(NULL) + { assign(x); } /// Destructor. - ~any() { + ~any() + { policy->static_delete(&object); } /// Assignment function from another any. - any& assign(const any& x) { + any& assign(const any& x) + { reset(); policy = x.policy; policy->clone(&x.object, &object); @@ -178,7 +200,8 @@ public: /// Assignment function. template - any& assign(const T& x) { + any& assign(const T& x) + { reset(); policy = anyimpl::get_policy(); policy->copy_from_value(&x, &object); @@ -197,8 +220,42 @@ public: return assign(x); } + /// Less than operator for sorting + bool operator<(const any& x) const + { + if (policy == x.policy) + { + void* p1 = const_cast(object); + void* p2 = const_cast(x.object); + return memcmp(policy->get_value(&p1), + x.policy->get_value(&p2), + policy->get_size()) < 0 ? 1 : 0; + } + return 0; + } + + /// equal operator + bool operator==(const any& x) const + { + if (policy == x.policy) + { + void* p1 = const_cast(object); + void* p2 = const_cast(x.object); + return memcmp(policy->get_value(&p1), + x.policy->get_value(&p2), + policy->get_size()) == 0 ? 1 : 0; + } + return 0; + } + /// Utility functions - any& swap(any& x) { + uint8_t getHash() const + { + void* p1 = const_cast(object); + return *(uint64_t*)policy->get_value(&p1) % 4048; + } + any& swap(any& x) + { std::swap(policy, x.policy); std::swap(object, x.object); return *this; @@ -206,27 +263,32 @@ public: /// Cast operator. You can only cast to the original type. template - T& cast() { - if (policy != anyimpl::get_policy()) + T& cast() + { + if (policy != anyimpl::get_policy()) throw anyimpl::bad_any_cast(); T* r = reinterpret_cast(policy->get_value(&object)); return *r; } /// Returns true if the any contains no value. - bool empty() const { + bool empty() const + { return policy == anyimpl::get_policy(); } /// Frees any allocated memory, and sets the value to NULL. - void reset() { + void reset() + { policy->static_delete(&object); policy = anyimpl::get_policy(); } /// Returns true if the two types are the same. - bool compatible(const any& x) const { + bool compatible(const any& x) const + { return policy == x.policy; } }; + } diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 8d110cfc8..c1f5bbd63 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -215,6 +215,22 @@ inline string getStringNullValue() namespace rowgroup { +const std::string typeStr(""); +const static_any::any& RowAggregation::charTypeId((char)1); +const static_any::any& RowAggregation::scharTypeId((signed char)1); +const static_any::any& RowAggregation::shortTypeId((short)1); +const static_any::any& RowAggregation::intTypeId((int)1); +const static_any::any& RowAggregation::longTypeId((long)1); +const static_any::any& RowAggregation::llTypeId((long long)1); +const static_any::any& RowAggregation::ucharTypeId((unsigned char)1); +const static_any::any& RowAggregation::ushortTypeId((unsigned short)1); +const static_any::any& RowAggregation::uintTypeId((unsigned int)1); +const static_any::any& RowAggregation::ulongTypeId((unsigned long)1); +const static_any::any& RowAggregation::ullTypeId((unsigned long long)1); +const static_any::any& RowAggregation::floatTypeId((float)1); +const static_any::any& RowAggregation::doubleTypeId((double)1); +const static_any::any& RowAggregation::strTypeId(typeStr); + KeyStorage::KeyStorage(const RowGroup& keys, Row** tRow) : tmpRow(tRow), rg(keys) { RGData data(rg); @@ -691,7 +707,8 @@ RowAggregation::RowAggregation(const vector& rowAggGroupByCol RowAggregation::RowAggregation(const RowAggregation& rhs): fAggMapPtr(NULL), fRowGroupOut(NULL), fTotalRowCount(0), fMaxTotalRowCount(AGG_ROWGROUP_SIZE), - fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0) + fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0), + fRGContext(rhs.fRGContext) { //fGroupByCols.clear(); //fFunctionCols.clear(); @@ -756,7 +773,6 @@ void RowAggregation::addRowGroup(const RowGroup* pRows, vector& in { // this function is for threaded aggregation, which is for group by and distinct. // if (countSpecial(pRows)) - Row rowIn; pRows->initRow(&rowIn); @@ -790,7 +806,7 @@ void RowAggregation::setJoinRowGroups(vector* pSmallSideRG, RowGroup* } //------------------------------------------------------------------------------ -// For UDAF, we need to sometimes start a new context. +// For UDAF, we need to sometimes start a new fRGContext. // // This will be called any number of times by each of the batchprimitiveprocessor // threads on the PM and by multple threads on the UM. It must remain @@ -801,29 +817,29 @@ void RowAggregation::resetUDAF(uint64_t funcColID) // Get the UDAF class pointer and store in the row definition object. RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColID].get()); - // resetUDAF needs to be re-entrant. Since we're modifying the context object - // by creating a new userData, we need a local copy. The copy constructor - // doesn't copy userData. - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + // RowAggregation and it's functions need to be re-entrant which means + // each instance (thread) needs its own copy of the context object. + // Note: operator=() doesn't copy userData. + fRGContext = rowUDAF->fUDAFContext; // Call the user reset for the group userData. Since, at this point, // context's userData will be NULL, reset will generate a new one. mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->reset(&rgContext); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } fRow.setUserDataStore(fRowGroupOut->getRGData()->getUserDataStore()); - fRow.setUserData(rgContext, - rgContext.getUserDataSP(), - rgContext.getUserDataSize(), + fRow.setUserData(fRGContext, + fRGContext.getUserDataSP(), + fRGContext.getUserDataSize(), rowUDAF->fAuxColumnIndex); - rgContext.setUserData(NULL); // Prevents calling deleteUserData on the context. + fRGContext.setUserData(NULL); // Prevents calling deleteUserData on the fRGContext. } //------------------------------------------------------------------------------ @@ -873,7 +889,6 @@ void RowAggregation::initialize() } } - // Save the RowGroup data pointer fResultDataVec.push_back(fRowGroupOut->getRGData()); @@ -1658,10 +1673,11 @@ void RowAggregation::updateEntry(const Row& rowIn) { for (uint64_t i = 0; i < fFunctionCols.size(); i++) { - int64_t colIn = fFunctionCols[i]->fInputColumnIndex; - int64_t colOut = fFunctionCols[i]->fOutputColumnIndex; + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[i]; + int64_t colIn = pFunctionCol->fInputColumnIndex; + int64_t colOut = pFunctionCol->fOutputColumnIndex; - switch (fFunctionCols[i]->fAggFunction) + switch (pFunctionCol->fAggFunction) { case ROWAGG_COUNT_COL_NAME: @@ -1675,7 +1691,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_MIN: case ROWAGG_MAX: case ROWAGG_SUM: - doMinMaxSum(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction); + doMinMaxSum(rowIn, colIn, colOut, pFunctionCol->fAggFunction); break; case ROWAGG_AVG: @@ -1692,7 +1708,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_BIT_OR: case ROWAGG_BIT_XOR: { - doBitOp(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction); + doBitOp(rowIn, colIn, colOut, pFunctionCol->fAggFunction); break; } @@ -1707,11 +1723,11 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); + RowUDAFFunctionCol* rowUDAF = dynamic_cast(pFunctionCol.get()); if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF); + doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF, i); } else { @@ -1725,7 +1741,7 @@ void RowAggregation::updateEntry(const Row& rowIn) { std::ostringstream errmsg; errmsg << "RowAggregation: function (id = " << - (uint64_t) fFunctionCols[i]->fAggFunction << ") is not supported."; + (uint64_t) pFunctionCol->fAggFunction << ") is not supported."; cerr << errmsg.str() << endl; throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); break; @@ -1997,131 +2013,142 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu } void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF) + RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { - std::vector valsIn; - execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupIn.getColTypes()[colIn]; - std::vector dataFlags; + int32_t paramCount = fRGContext.getParameterCount(); + // The vector of parameters to be sent to the UDAF + mcsv1sdk::ColumnDatum valsIn[paramCount]; + uint32_t dataFlags[paramCount]; - // Get the context for this rowGroup. Make a copy so we're thread safe. - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); - - // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; - - if (isNull(&fRowGroupIn, rowIn, colIn) == true) + execplan::CalpontSystemCatalog::ColDataType colDataType; + for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) { - if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + // Turn on NULL flags + dataFlags[i] = 0; + if (isNull(&fRowGroupIn, rowIn, colIn) == true) { - return; + if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + { + return; + } + dataFlags[i] |= mcsv1sdk::PARAM_IS_NULL; + } + + colDataType = fRowGroupIn.getColTypes()[colIn]; + if (!fRGContext.isParamNull(i)) + { + switch (colDataType) + { + case execplan::CalpontSystemCatalog::TINYINT: + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + case execplan::CalpontSystemCatalog::INT: + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + break; + } + + case execplan::CalpontSystemCatalog::UTINYINT: + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + case execplan::CalpontSystemCatalog::UINT: + case execplan::CalpontSystemCatalog::UBIGINT: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + datum.columnData = rowIn.getUintField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + { + datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; + datum.columnData = rowIn.getDoubleField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + { + datum.dataType = execplan::CalpontSystemCatalog::FLOAT; + datum.columnData = rowIn.getFloatField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + datum.columnData = rowIn.getUintField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::TIME: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + datum.columnData = rowIn.getIntField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + { + datum.dataType = colDataType; + datum.columnData = rowIn.getStringField(colIn); + break; + } + + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation " << fRGContext.getName() << + ": No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } + } } - flag |= mcsv1sdk::PARAM_IS_NULL; - } - - flags.push_back(flag); - rgContext.setDataFlags(&flags); - - mcsv1sdk::ColumnDatum datum; - - if (!rgContext.isParamNull(0)) - { - switch (colDataType) + // MCOL-1201: If there are multiple parameters, the next fFunctionCols + // will have the column used. By incrementing the funcColsIdx (passed by + // ref, we also increment the caller's index. + if (fFunctionCols.size() > funcColsIdx + 1 + && fFunctionCols[funcColsIdx+1]->fAggFunction == ROWAGG_MULTI_PARM) { - case execplan::CalpontSystemCatalog::TINYINT: - case execplan::CalpontSystemCatalog::SMALLINT: - case execplan::CalpontSystemCatalog::MEDINT: - case execplan::CalpontSystemCatalog::INT: - case execplan::CalpontSystemCatalog::BIGINT: - case execplan::CalpontSystemCatalog::DECIMAL: - case execplan::CalpontSystemCatalog::UDECIMAL: - { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - datum.scale = fRowGroupIn.getScale()[colIn]; - datum.precision = fRowGroupIn.getPrecision()[colIn]; - break; - } - - case execplan::CalpontSystemCatalog::UTINYINT: - case execplan::CalpontSystemCatalog::USMALLINT: - case execplan::CalpontSystemCatalog::UMEDINT: - case execplan::CalpontSystemCatalog::UINT: - case execplan::CalpontSystemCatalog::UBIGINT: - { - datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::DOUBLE: - case execplan::CalpontSystemCatalog::UDOUBLE: - { - datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; - datum.columnData = rowIn.getDoubleField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::FLOAT: - case execplan::CalpontSystemCatalog::UFLOAT: - { - datum.dataType = execplan::CalpontSystemCatalog::FLOAT; - datum.columnData = rowIn.getFloatField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::DATE: - case execplan::CalpontSystemCatalog::DATETIME: - { - datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::TIME: - { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::CHAR: - case execplan::CalpontSystemCatalog::VARCHAR: - case execplan::CalpontSystemCatalog::TEXT: - case execplan::CalpontSystemCatalog::VARBINARY: - case execplan::CalpontSystemCatalog::CLOB: - case execplan::CalpontSystemCatalog::BLOB: - { - datum.dataType = colDataType; - datum.columnData = rowIn.getStringField(colIn); - break; - } - - default: - { - std::ostringstream errmsg; - errmsg << "RowAggregation " << rgContext.getName() << - ": No logic for data type: " << colDataType; - throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); - break; - } + ++funcColsIdx; + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; + colIn = pFunctionCol->fInputColumnIndex; + colOut = pFunctionCol->fOutputColumnIndex; + } + else + { + break; } } - valsIn.push_back(datum); - // The intermediate values are stored in userData referenced by colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setDataFlags(dataFlags); + fRGContext.setUserData(fRow.getUserData(colAux)); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->nextValue(&rgContext, valsIn); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } } @@ -2218,6 +2245,7 @@ RowAggregationUM::RowAggregationUM(const RowAggregationUM& rhs) : fHasAvg(rhs.fHasAvg), fKeyOnHeap(rhs.fKeyOnHeap), fHasStatsFunc(rhs.fHasStatsFunc), + fHasUDAF(rhs.fHasUDAF), fExpression(rhs.fExpression), fTotalMemUsage(rhs.fTotalMemUsage), fRm(rhs.fRm), @@ -2419,7 +2447,7 @@ void RowAggregationUM::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { @@ -2585,22 +2613,6 @@ void RowAggregationUM::calculateAvgColumns() // Sets the value from valOut into column colOut, performing any conversions. void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) { - static const static_any::any& charTypeId((char)1); - static const static_any::any& scharTypeId((signed char)1); - static const static_any::any& shortTypeId((short)1); - static const static_any::any& intTypeId((int)1); - static const static_any::any& longTypeId((long)1); - static const static_any::any& llTypeId((long long)1); - static const static_any::any& ucharTypeId((unsigned char)1); - static const static_any::any& ushortTypeId((unsigned short)1); - static const static_any::any& uintTypeId((unsigned int)1); - static const static_any::any& ulongTypeId((unsigned long)1); - static const static_any::any& ullTypeId((unsigned long long)1); - static const static_any::any& floatTypeId((float)1); - static const static_any::any& doubleTypeId((double)1); - static const std::string typeStr(""); - static const static_any::any& strTypeId(typeStr); - execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; if (valOut.empty()) @@ -2609,6 +2621,179 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) return; } + int64_t intOut = 0; + uint64_t uintOut = 0; + float floatOut = 0.0; + double doubleOut = 0.0; + ostringstream oss; + std::string strOut; + + bool bSetSuccess = false; + switch (colDataType) + { + case execplan::CalpontSystemCatalog::BIT: + case execplan::CalpontSystemCatalog::TINYINT: + if (valOut.compatible(charTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + else if (valOut.compatible(scharTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + if (bSetSuccess) + { + fRow.setIntField<1>(intOut, colOut); + } + break; + + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + if (valOut.compatible(shortTypeId)) + { + intOut = valOut.cast(); + fRow.setIntField<2>(intOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::INT: + if (valOut.compatible(uintTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + else if (valOut.compatible(longTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + if (bSetSuccess) + { + fRow.setIntField<4>(intOut, colOut); + } + break; + + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + if (valOut.compatible(llTypeId)) + { + intOut = valOut.cast(); + fRow.setIntField<8>(intOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UTINYINT: + if (valOut.compatible(ucharTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<1>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + if (valOut.compatible(ushortTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<2>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UINT: + if (valOut.compatible(uintTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<4>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UBIGINT: + if (valOut.compatible(ulongTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<8>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + if (valOut.compatible(ulongTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<8>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + if (valOut.compatible(floatTypeId)) + { + floatOut = valOut.cast(); + fRow.setFloatField(floatOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + if (valOut.compatible(doubleTypeId)) + { + doubleOut = valOut.cast(); + fRow.setDoubleField(doubleOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + fRow.setStringField(strOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + fRow.setVarBinaryField(strOut, colOut); + bSetSuccess = true; + } + break; + + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation: No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } + } + if (!bSetSuccess) + { + SetUDAFAnyValue(valOut, colOut); + } +} + +void RowAggregationUM::SetUDAFAnyValue(static_any::any& valOut, int64_t colOut) +{ + execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; + // This may seem a bit convoluted. Users shouldn't return a type // that they didn't set in mcsv1_UDAF::init(), but this // handles whatever return type is given and casts @@ -2814,7 +2999,7 @@ void RowAggregationUM::calculateUDAFColumns() continue; rowUDAF = dynamic_cast(fFunctionCols[i].get()); - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + fRGContext = rowUDAF->fUDAFContext; int64_t colOut = rowUDAF->fOutputColumnIndex; int64_t colAux = rowUDAF->fAuxColumnIndex; @@ -2826,26 +3011,26 @@ void RowAggregationUM::calculateUDAFColumns() fRowGroupOut->getRow(j, &fRow); // Turn the NULL flag off. We can't know NULL at this point - rgContext.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); // The intermediate values are stored in colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setUserData(fRow.getUserData(colAux)); // Call the UDAF evaluate function mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->evaluate(&rgContext, valOut); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); } - rgContext.setUserData(NULL); + fRGContext.setUserData(NULL); } } @@ -3116,54 +3301,60 @@ void RowAggregationUM::doNullConstantAggregate(const ConstantAggData& aggData, u { // For a NULL constant, call nextValue with NULL and then evaluate. bool bInterrupted = false; - mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext); - context.setInterrupted(bInterrupted); - context.createUserData(); + fRGContext.setInterrupted(bInterrupted); + fRGContext.createUserData(); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - std::vector valsIn; + mcsv1sdk::ColumnDatum valsIn[1]; // Call a reset, then nextValue, then execute. This will evaluate // the UDAF for the constant. - rc = context.getFunction()->reset(&context); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } - +#if 0 + uint32_t dataFlags[fRGContext.getParameterCount()]; + for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) + { + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + // Turn on NULL flags + dataFlags[i] = 0; + } +#endif // Turn the NULL and CONSTANT flags on. - std::vector flags; - uint32_t flag = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; - flags.push_back(flag); - context.setDataFlags(&flags); + uint32_t flags[1]; + flags[0] = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; + fRGContext.setDataFlags(flags); // Create a dummy datum - mcsv1sdk::ColumnDatum datum; + mcsv1sdk::ColumnDatum& datum = valsIn[0]; datum.dataType = execplan::CalpontSystemCatalog::BIGINT; datum.columnData = 0; - valsIn.push_back(datum); - rc = context.getFunction()->nextValue(&context, valsIn); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } static_any::any valOut; - rc = context.getFunction()->evaluate(&context, valOut); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); - context.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); } break; @@ -3460,30 +3651,28 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData case ROWAGG_UDAF: { bool bInterrupted = false; - mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext); - context.setInterrupted(bInterrupted); - context.createUserData(); + fRGContext.setInterrupted(bInterrupted); + fRGContext.createUserData(); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - std::vector valsIn; + mcsv1sdk::ColumnDatum valsIn[1]; // Call a reset, then nextValue, then execute. This will evaluate // the UDAF for the constant. - rc = context.getFunction()->reset(&context); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Turn the CONSTANT flags on. - std::vector flags; - uint32_t flag = mcsv1sdk::PARAM_IS_CONSTANT; - flags.push_back(flag); - context.setDataFlags(&flags); + uint32_t flags[1]; + flags[0] = mcsv1sdk::PARAM_IS_CONSTANT; + fRGContext.setDataFlags(flags); // Create a datum item for sending to UDAF - mcsv1sdk::ColumnDatum datum; + mcsv1sdk::ColumnDatum& datum = valsIn[0]; datum.dataType = (CalpontSystemCatalog::ColDataType)colDataType; switch (colDataType) @@ -3567,27 +3756,27 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData break; } - valsIn.push_back(datum); - rc = context.getFunction()->nextValue(&context, valsIn); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } static_any::any valOut; - rc = context.getFunction()->evaluate(&context, valOut); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); - context.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); } break; @@ -3806,7 +3995,7 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { @@ -4011,45 +4200,43 @@ void RowAggregationUMP2::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut // rowUDAF(in) - pointer to the RowUDAFFunctionCol for this UDAF instance //------------------------------------------------------------------------------ void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF) + RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { static_any::any valOut; - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); // Get the user data boost::shared_ptr userData = rowIn.getUserData(colIn + 1); // Unlike other aggregates, the data isn't in colIn, so testing it for NULL // there won't help. In case of NULL, userData will be NULL. - std::vector flags; - uint32_t flag = 0; + uint32_t flags[1]; + flags[0] = 0; if (!userData) { - if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { return; } // Turn on NULL flags - flag |= mcsv1sdk::PARAM_IS_NULL; + flags[0] |= mcsv1sdk::PARAM_IS_NULL; } - flags.push_back(flag); - rgContext.setDataFlags(&flags); + fRGContext.setDataFlags(flags); // The intermediate values are stored in colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setUserData(fRow.getUserData(colAux)); // Call the UDAF subEvaluate method mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->subEvaluate(&rgContext, userData.get()); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userData.get()); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::IDBExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::IDBExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } } @@ -4246,7 +4433,7 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index b6294f193..282f354fc 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -110,6 +110,9 @@ enum RowAggFunctionType // User Defined Aggregate Function ROWAGG_UDAF, + // If an Aggregate has more than one parameter, this will be used for parameters after the first + ROWAGG_MULTI_PARM, + // internal function type to avoid duplicate the work // handling ROWAGG_COUNT_NO_OP, ROWAGG_DUP_FUNCT and ROWAGG_DUP_AVG is a little different // ROWAGG_COUNT_NO_OP : count done by AVG, no need to copy @@ -583,7 +586,7 @@ protected: virtual void doAvg(const Row&, int64_t, int64_t, int64_t); virtual void doStatistics(const Row&, int64_t, int64_t, int64_t); virtual void doBitOp(const Row&, int64_t, int64_t, int); - virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); + virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); virtual bool countSpecial(const RowGroup* pRG) { fRow.setIntField<8>(fRow.getIntField<8>(0) + pRG->getRowCount(), 0); @@ -660,6 +663,25 @@ protected: //need access to rowgroup storage holding the rows to hash & ==. friend class AggHasher; friend class AggComparator; + + // We need a separate copy for each thread. + mcsv1sdk::mcsv1Context fRGContext; + + // These are handy for testing the actual type of static_any for UDAF + static const static_any::any& charTypeId; + static const static_any::any& scharTypeId; + static const static_any::any& shortTypeId; + static const static_any::any& intTypeId; + static const static_any::any& longTypeId; + static const static_any::any& llTypeId; + static const static_any::any& ucharTypeId; + static const static_any::any& ushortTypeId; + static const static_any::any& uintTypeId; + static const static_any::any& ulongTypeId; + static const static_any::any& ullTypeId; + static const static_any::any& floatTypeId; + static const static_any::any& doubleTypeId; + static const static_any::any& strTypeId; }; //------------------------------------------------------------------------------ @@ -783,6 +805,9 @@ protected: // Sets the value from valOut into column colOut, performing any conversions. void SetUDAFValue(static_any::any& valOut, int64_t colOut); + // If the datatype returned by evaluate isn't what we expect, convert. + void SetUDAFAnyValue(static_any::any& valOut, int64_t colOut); + // calculate the UDAF function all rows received. UM only function. void calculateUDAFColumns(); @@ -877,7 +902,7 @@ protected: void doStatistics(const Row&, int64_t, int64_t, int64_t); void doGroupConcat(const Row&, int64_t, int64_t); void doBitOp(const Row&, int64_t, int64_t, int); - void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); + void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); bool countSpecial(const RowGroup* pRG) { return false; diff --git a/utils/udfsdk/CMakeLists.txt b/utils/udfsdk/CMakeLists.txt index e69ff4d88..01009e35a 100755 --- a/utils/udfsdk/CMakeLists.txt +++ b/utils/udfsdk/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ########### next target ############### -set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp) +set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp regr_avgx.cpp avgx.cpp) add_definitions(-DMYSQL_DYNAMIC_PLUGIN) diff --git a/utils/udfsdk/allnull.cpp b/utils/udfsdk/allnull.cpp index b6b8d79da..247b9e28f 100644 --- a/utils/udfsdk/allnull.cpp +++ b/utils/udfsdk/allnull.cpp @@ -27,11 +27,11 @@ struct allnull_data #define OUT_TYPE int64_t mcsv1_UDAF::ReturnCode allnull::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { context->setUserDataSize(sizeof(allnull_data)); - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -52,8 +52,7 @@ mcsv1_UDAF::ReturnCode allnull::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { struct allnull_data* data = (struct allnull_data*)context->getUserData()->data; diff --git a/utils/udfsdk/allnull.h b/utils/udfsdk/allnull.h index 86697b052..da17f5d6b 100644 --- a/utils/udfsdk/allnull.h +++ b/utils/udfsdk/allnull.h @@ -103,7 +103,7 @@ public: * colTypes or wrong number of arguments. Else return * mcsv1_UDAF::SUCCESS. */ - virtual ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); /** * reset() @@ -138,7 +138,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() diff --git a/utils/udfsdk/avg_mode.cpp b/utils/udfsdk/avg_mode.cpp index f39b5e402..5429183d9 100644 --- a/utils/udfsdk/avg_mode.cpp +++ b/utils/udfsdk/avg_mode.cpp @@ -25,9 +25,9 @@ using namespace mcsv1sdk; mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("avg_mode() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode avg_mode::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; MODE_DATA& data = static_cast(context->getUserData())->mData; @@ -187,8 +186,7 @@ mcsv1_UDAF::ReturnCode avg_mode::evaluate(mcsv1Context* context, static_any::any return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; MODE_DATA& data = static_cast(context->getUserData())->mData; diff --git a/utils/udfsdk/avg_mode.h b/utils/udfsdk/avg_mode.h index 4f3442005..5722c5fea 100644 --- a/utils/udfsdk/avg_mode.h +++ b/utils/udfsdk/avg_mode.h @@ -18,7 +18,7 @@ /*********************************************************************** * $Id$ * -* mcsv1_UDAF.h +* avg_mode.h ***********************************************************************/ /** @@ -50,8 +50,8 @@ * is also used to describe the interface that is used for * either. */ -#ifndef HEADER_mode -#define HEADER_mode +#ifndef HEADER_avg_mode +#define HEADER_avg_mode #include #include @@ -134,7 +134,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -169,8 +169,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -246,8 +245,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() diff --git a/utils/udfsdk/mcsv1_udaf.cpp b/utils/udfsdk/mcsv1_udaf.cpp index 349a642ec..ee08dcc07 100644 --- a/utils/udfsdk/mcsv1_udaf.cpp +++ b/utils/udfsdk/mcsv1_udaf.cpp @@ -36,6 +36,8 @@ UDAF_MAP UDAFMap::fm; #include "ssq.h" #include "median.h" #include "avg_mode.h" +#include "regr_avgx.h" +#include "avgx.h" UDAF_MAP& UDAFMap::getMap() { if (fm.size() > 0) @@ -52,6 +54,8 @@ UDAF_MAP& UDAFMap::getMap() fm["ssq"] = new ssq(); fm["median"] = new median(); fm["avg_mode"] = new avg_mode(); + fm["regr_avgx"] = new regr_avgx(); + fm["avgx"] = new avgx(); return fm; } @@ -115,8 +119,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const { // We don't test the per row data fields. They don't determine // if it's the same Context. - if (getName() != c.getName() - || fRunFlags != c.fRunFlags + if (getName() != c.getName() + ||fRunFlags != c.fRunFlags || fContextFlags != c.fContextFlags || fUserDataSize != c.fUserDataSize || fResultType != c.fResultType @@ -125,7 +129,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const || fStartFrame != c.fStartFrame || fEndFrame != c.fEndFrame || fStartConstant != c.fStartConstant - || fEndConstant != c.fEndConstant) + || fEndConstant != c.fEndConstant + || fParamCount != c.fParamCount) return false; return true; @@ -217,6 +222,7 @@ void mcsv1Context::serialize(messageqcpp::ByteStream& b) const b << (uint32_t)fEndFrame; b << fStartConstant; b << fEndConstant; + b << fParamCount; } void mcsv1Context::unserialize(messageqcpp::ByteStream& b) @@ -238,6 +244,7 @@ void mcsv1Context::unserialize(messageqcpp::ByteStream& b) fEndFrame = (WF_FRAME)frame; b >> fStartConstant; b >> fEndConstant; + b >> fParamCount; } void UserData::serialize(messageqcpp::ByteStream& bs) const diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index d24852c28..df3f47649 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -77,6 +77,7 @@ #include "any.hpp" #include "calpontsystemcatalog.h" #include "wf_frame.h" +#include "my_decimal_limits.h" using namespace execplan; @@ -200,12 +201,8 @@ static uint64_t CONTEXT_IS_PM __attribute__ ((unused)) = 1 << 2; // Flags that describe the contents of a specific input parameter // These will be set in context->dataFlags for each method call by the framework. // User code shouldn't use these directly -static uint64_t PARAM_IS_NULL __attribute__ ((unused)) = 1; -static uint64_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; - -// shorthand for the list of columns in the call sent to init() -// first is the actual column name and second is the data type in Columnstore. -typedef std::vector >COL_TYPES; +static uint32_t PARAM_IS_NULL __attribute__ ((unused)) = 1; +static uint32_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; // This is the context class that is passed to all API callbacks // The framework potentially sets data here for each invocation of @@ -269,7 +266,9 @@ public: EXPORT bool isPM(); // Parameter refinement description accessors - // valid in nextValue and dropValue + + // How many actual parameters were entered. + // valid in all calls size_t getParameterCount() const; // Determine if an input parameter is NULL @@ -298,6 +297,7 @@ public: // This only makes sense if the return type is decimal, but should be set // to (0, -1) for other types if the inout is decimal. // valid in init() + // Set the scale to DECIMAL_NOT_SPECIFIED if you want a floating decimal. EXPORT bool setScale(int32_t scale); EXPORT bool setPrecision(int32_t precision); @@ -372,7 +372,7 @@ private: int32_t fResultscale; // For scale, the number of digits to the right of the decimal int32_t fResultPrecision; // The max number of digits allowed in the decimal value std::string errorMsg; - std::vector* dataFlags; // one entry for each parameter + uint32_t* dataFlags; // an integer array wirh one entry for each parameter bool* bInterrupted; // Gets set to true by the Framework if something happens WF_FRAME fStartFrame; // Is set to default to start, then modified by the actual frame in the call WF_FRAME fEndFrame; // Is set to default to start, then modified by the actual frame in the call @@ -380,6 +380,7 @@ private: int32_t fEndConstant; // for end frame WF_PRECEEDIMG or WF_FOLLOWING std::string functionName; mcsv1sdk::mcsv1_UDAF* func; + int32_t fParamCount; public: // For use by the framework @@ -394,13 +395,14 @@ public: EXPORT void clearContextFlag(uint64_t flag); EXPORT uint64_t getContextFlags() const; EXPORT uint32_t getUserDataSize() const; - EXPORT std::vector& getDataFlags(); - EXPORT void setDataFlags(std::vector* flags); + EXPORT uint32_t* getDataFlags(); + EXPORT void setDataFlags(uint32_t* flags); EXPORT void setInterrupted(bool interrupted); EXPORT void setInterrupted(bool* interrupted); EXPORT mcsv1sdk::mcsv1_UDAF* getFunction(); EXPORT mcsv1sdk::mcsv1_UDAF* getFunction() const; EXPORT boost::shared_ptr getUserDataSP(); + EXPORT void setParamCount(int32_t paramCount); }; // Since aggregate functions can operate on any data type, we use the following structure @@ -419,9 +421,10 @@ public: struct ColumnDatum { CalpontSystemCatalog::ColDataType dataType; // defined in calpontsystemcatalog.h - static_any::any columnData; + static_any::any columnData; // Not valid in init() uint32_t scale; // If dataType is a DECIMAL type uint32_t precision; // If dataType is a DECIMAL type + std::string alias; // Only filled in for init() ColumnDatum() : dataType(CalpontSystemCatalog::UNDEFINED), scale(0), precision(-1) {}; }; @@ -466,7 +469,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes) = 0; + ColumnDatum* colTypes) = 0; /** * reset() @@ -501,8 +504,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn) = 0; + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn) = 0; /** * subEvaluate() @@ -579,8 +581,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() @@ -640,32 +641,32 @@ inline mcsv1Context::mcsv1Context() : fEndFrame(WF_CURRENT_ROW), fStartConstant(0), fEndConstant(0), - func(NULL) + func(NULL), + fParamCount(0) { } inline mcsv1Context::mcsv1Context(const mcsv1Context& rhs) : - fContextFlags(0), - fColWidth(0), - dataFlags(NULL), - bInterrupted(NULL), - func(NULL) + dataFlags(NULL) { copy(rhs); } inline mcsv1Context& mcsv1Context::copy(const mcsv1Context& rhs) { - fRunFlags = rhs.getRunFlags(); - fResultType = rhs.getResultType(); - fUserDataSize = rhs.getUserDataSize(); - fResultscale = rhs.getScale(); - fResultPrecision = rhs.getPrecision(); + fRunFlags = rhs.fRunFlags; + fContextFlags = rhs.fContextFlags; + fResultType = rhs.fResultType; + fUserDataSize = rhs.fUserDataSize; + fColWidth = rhs.fColWidth; + fResultscale = rhs.fResultscale; + fResultPrecision = rhs.fResultPrecision; rhs.getStartFrame(fStartFrame, fStartConstant); rhs.getEndFrame(fEndFrame, fEndConstant); - functionName = rhs.getName(); - bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference - func = rhs.func; + functionName = rhs.functionName; + bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference + func = rhs.func; + fParamCount = rhs.fParamCount; return *this; } @@ -675,11 +676,7 @@ inline mcsv1Context::~mcsv1Context() inline mcsv1Context& mcsv1Context::operator=(const mcsv1Context& rhs) { - fContextFlags = 0; - fColWidth = 0; dataFlags = NULL; - bInterrupted = NULL; - func = NULL; return copy(rhs); } @@ -753,16 +750,13 @@ inline bool mcsv1Context::isPM() inline size_t mcsv1Context::getParameterCount() const { - if (dataFlags) - return dataFlags->size(); - - return 0; + return fParamCount; } inline bool mcsv1Context::isParamNull(int paramIdx) { if (dataFlags) - return (*dataFlags)[paramIdx] & PARAM_IS_NULL; + return dataFlags[paramIdx] & PARAM_IS_NULL; return false; } @@ -770,7 +764,7 @@ inline bool mcsv1Context::isParamNull(int paramIdx) inline bool mcsv1Context::isParamConstant(int paramIdx) { if (dataFlags) - return (*dataFlags)[paramIdx] & PARAM_IS_CONSTANT; + return dataFlags[paramIdx] & PARAM_IS_CONSTANT; return false; } @@ -939,18 +933,22 @@ inline uint32_t mcsv1Context::getUserDataSize() const return fUserDataSize; } -inline std::vector& mcsv1Context::getDataFlags() +inline uint32_t* mcsv1Context::getDataFlags() { - return *dataFlags; + return dataFlags; } -inline void mcsv1Context::setDataFlags(std::vector* flags) +inline void mcsv1Context::setDataFlags(uint32_t* flags) { dataFlags = flags; } -inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, - std::vector& valsDropped) +inline void mcsv1Context::setParamCount(int32_t paramCount) +{ + fParamCount = paramCount; +} + +inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { return NOT_IMPLEMENTED; } diff --git a/utils/udfsdk/median.cpp b/utils/udfsdk/median.cpp index e32d721f1..9c7e72dc3 100644 --- a/utils/udfsdk/median.cpp +++ b/utils/udfsdk/median.cpp @@ -25,9 +25,9 @@ using namespace mcsv1sdk; mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("median() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode median::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; MEDIAN_DATA& data = static_cast(context->getUserData())->mData; @@ -212,8 +211,7 @@ mcsv1_UDAF::ReturnCode median::evaluate(mcsv1Context* context, static_any::any& return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; MEDIAN_DATA& data = static_cast(context->getUserData())->mData; diff --git a/utils/udfsdk/median.h b/utils/udfsdk/median.h index d64792461..142be6ba8 100644 --- a/utils/udfsdk/median.h +++ b/utils/udfsdk/median.h @@ -134,7 +134,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -169,8 +169,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -246,8 +245,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() diff --git a/utils/udfsdk/ssq.cpp b/utils/udfsdk/ssq.cpp index 4d9ef7e10..20fdc33db 100644 --- a/utils/udfsdk/ssq.cpp +++ b/utils/udfsdk/ssq.cpp @@ -34,9 +34,9 @@ struct ssq_data #define OUT_TYPE int64_t mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -44,13 +44,13 @@ mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("ssq() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -81,8 +81,7 @@ mcsv1_UDAF::ReturnCode ssq::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; @@ -183,8 +182,7 @@ mcsv1_UDAF::ReturnCode ssq::evaluate(mcsv1Context* context, static_any::any& val return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; diff --git a/utils/udfsdk/ssq.h b/utils/udfsdk/ssq.h index 514c7a3f0..2cac61c2c 100644 --- a/utils/udfsdk/ssq.h +++ b/utils/udfsdk/ssq.h @@ -114,7 +114,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -147,8 +147,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -224,8 +223,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); protected: }; diff --git a/utils/udfsdk/udfmysql.cpp b/utils/udfsdk/udfmysql.cpp index 981651c43..dc0277ccc 100644 --- a/utils/udfsdk/udfmysql.cpp +++ b/utils/udfsdk/udfmysql.cpp @@ -490,6 +490,168 @@ extern "C" // return data->sumsq; return 0; } + +//======================================================================= + + /** + * regr_avgx connector stub + */ + struct regr_avgx_data + { + double sumx; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_avgx_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_avgx() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // TODO test for NULL in x and y + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + ++data->cnt; + data->sumx += xval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } + +//======================================================================= + + /** + * avgx connector stub. Exactly the same functionality as the + * built in avg() function. Use to test the performance of the + * API + */ + struct avgx_data + { + double sumx; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct avgx_data* data; + if (args->arg_count != 1) + { + strcpy(message,"avgx() requires one argument"); + return 1; + } + + if (!(data = (struct avgx_data*) malloc(sizeof(struct avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct avgx_data* data = (struct avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // TODO test for NULL in x and y + struct avgx_data* data = (struct avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + ++data->cnt; + data->sumx += xval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct avgx_data* data = (struct avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } } // vim:ts=4 sw=4: diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index 664b0e7de..fe1f3fd0e 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -204,8 +204,10 @@ Filters="*.c;*.C;*.cc;*.cpp;*.cp;*.cxx;*.c++;*.prg;*.pas;*.dpr;*.asm;*.s;*.bas;*.java;*.cs;*.sc;*.e;*.cob;*.html;*.rc;*.tcl;*.py;*.pl;*.d"> + + @@ -215,8 +217,10 @@ Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if"> + + diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index f302c49cd..5cd5243c5 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -52,6 +52,7 @@ using namespace joblist; namespace windowfunction { + template boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context) { @@ -142,7 +143,7 @@ template void WF_udaf::resetData() { getContext().getFunction()->reset(&getContext()); - fSet.clear(); + fDistinctSet.clear(); WindowFunctionType::resetData(); } @@ -150,8 +151,8 @@ template void WF_udaf::parseParms(const std::vector& parms) { bRespectNulls = true; - // parms[1]: respect null | ignore null - ConstantColumn* cc = dynamic_cast(parms[1].get()); + // The last parms: respect null | ignore null + ConstantColumn* cc = dynamic_cast(parms[parms.size()-1].get()); idbassert(cc != NULL); bool isNull = false; // dummy, harded coded bRespectNulls = (cc->getIntVal(fRow, isNull) > 0); @@ -167,52 +168,71 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) } mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - uint64_t colOut = fFieldIndex[0]; - uint64_t colIn = fFieldIndex[1]; - mcsv1sdk::ColumnDatum datum; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colOut); + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC); + + // Put the parameter metadata (type, scale, precision) into valsIn + mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) + { + uint64_t colIn = fFieldIndex[i+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } for (int64_t i = b; i < e; i++) { if (i % 1000 == 0 && fStep->cancelled()) break; + bool bHasNull = false; fRow.setData(getPointer(fRowData->at(i))); // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; + uint32_t flags[getContext().getParameterCount()]; - if (fRow.isNullValue(colIn) == true) + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - if (!bRespectNulls) + uint64_t colIn = fFieldIndex[k+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[k]; + flags[k] = 0; + if (fRow.isNullValue(colIn) == true) { - continue; + if (!bRespectNulls) + { + bHasNull = true; + break; + } + + flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - flag |= mcsv1sdk::PARAM_IS_NULL; + T valIn; + getValue(colIn, valIn, &datum.dataType); + + // Check for distinct, if turned on. + // Currently, distinct only works for param 1 + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + + datum.columnData = valIn; } - - flags.push_back(flag); - getContext().setDataFlags(&flags); - - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - // TODO: when we impliment distinct, we need to revist this. - if ((fDistinct) || (fSet.find(valIn) != fSet.end())) + if (bHasNull) { continue; } - datum.columnData = valIn; - - std::vector valsIn; - valsIn.push_back(datum); - rc = getContext().getFunction()->dropValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::NOT_IMPLEMENTED) @@ -442,59 +462,191 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) else if (fPrev <= e && fPrev > c) e = c; - uint64_t colIn = fFieldIndex[1]; + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC); - mcsv1sdk::ColumnDatum datum; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colOut); + // Put the parameter metadata (type, scale, precision) into valsIn + mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) + { + uint64_t colIn = fFieldIndex[i+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } if (b <= c && c <= e) getContext().setContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); else getContext().clearContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); - + bool bHasNull = false; for (int64_t i = b; i <= e; i++) { if (i % 1000 == 0 && fStep->cancelled()) break; fRow.setData(getPointer(fRowData->at(i))); - // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; - if (fRow.isNullValue(colIn) == true) + // NULL flags + uint32_t flags[getContext().getParameterCount()]; + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - if (!bRespectNulls) + uint64_t colIn = fFieldIndex[k+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[k]; + + // Turn on Null flags or skip based on respect nulls + flags[k] = 0; + if (fRow.isNullValue(colIn) == true) + { + if (!bRespectNulls) + { + bHasNull = true; + break; + } + + flags[k] |= mcsv1sdk::PARAM_IS_NULL; + } + + // MCOL-1201 Multi-Paramter calls + switch (datum.dataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + { + int64_t valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + case CalpontSystemCatalog::UDECIMAL: + { + uint64_t valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + { + double valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + { + float valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::BLOB: + { + string valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + default: + { + string errStr = "(" + colType2String[i] + ")"; + errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + + break; + } + } + // Skip if any value is NULL and respect nulls is off. + if (bHasNull) { continue; } - - flag |= mcsv1sdk::PARAM_IS_NULL; } - - flags.push_back(flag); - getContext().setDataFlags(&flags); - - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - if ((fDistinct) || (fSet.find(valIn) != fSet.end())) - { - continue; - } - - if (fDistinct) - fSet.insert(valIn); - - datum.columnData = valIn; - - std::vector valsIn; - valsIn.push_back(datum); - + getContext().setDataFlags(flags); + rc = getContext().getFunction()->nextValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) diff --git a/utils/windowfunction/wf_udaf.h b/utils/windowfunction/wf_udaf.h index babb32565..f7a4c4b08 100644 --- a/utils/windowfunction/wf_udaf.h +++ b/utils/windowfunction/wf_udaf.h @@ -21,13 +21,35 @@ #ifndef UTILS_WF_UDAF_H #define UTILS_WF_UDAF_H -#include +#ifndef _MSC_VER +#include +#else +#include +#endif #include "windowfunctiontype.h" #include "mcsv1_udaf.h" namespace windowfunction { +// Hash classes for the distinct hashmap +class DistinctHasher +{ +public: + inline size_t operator()(const static_any::any& a) const + { + return a.getHash(); + } +}; + +class DistinctEqual +{ +public: + inline bool operator()(const static_any::any& lhs, static_any::any& rhs) const + { + return lhs == rhs; + } +}; // A class to control the execution of User Define Analytic Functions (UDAnF) // as defined by a specialization of mcsv1sdk::mcsv1_UDAF @@ -72,7 +94,8 @@ protected: bool fDistinct; bool bRespectNulls; // respect null | ignore null bool bHasDropValue; // Set to false when we discover the UDAnF doesn't implement dropValue. - std::set fSet; // To hold distinct values + // To hold distinct values + std::tr1::unordered_set fDistinctSet; static_any::any fValOut; // The return value public: diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp index 950045899..4c5b4de32 100644 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -492,10 +492,10 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos) static uint64_t dateNull = joblist::DATENULL; static uint64_t datetimeNull = joblist::DATETIMENULL; static uint64_t timeNull = joblist::TIMENULL; - static uint64_t char1Null = joblist::CHAR1NULL; - static uint64_t char2Null = joblist::CHAR2NULL; - static uint64_t char4Null = joblist::CHAR4NULL; - static uint64_t char8Null = joblist::CHAR8NULL; +// static uint64_t char1Null = joblist::CHAR1NULL; +// static uint64_t char2Null = joblist::CHAR2NULL; +// static uint64_t char4Null = joblist::CHAR4NULL; +// static uint64_t char8Null = joblist::CHAR8NULL; static string stringNull(""); void* v = NULL; diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index 7cd275021..71d0e1fbd 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -1280,7 +1280,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -2025,10 +2025,10 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 0; k < colStructList.size(); k++) + for (size_t k = 0; k < colStructList.size(); k++) { // Skip the selected column - if (k == colId) + if (k == (size_t)colId) continue; Column expandCol; @@ -2583,7 +2583,7 @@ int WriteEngineWrapper::insertColumnRec_SYS(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -3278,7 +3278,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)];