From d010397be647a540189687434c57084bed1e7938 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Wed, 9 May 2018 14:52:42 +0100 Subject: [PATCH 001/123] MCOL-1197 Make -c work in cpimport It turns out -c wasn't actually connected to anything and now with have BLOB/TEXT it is pretty useful. If -c is set to < 1MB then 1MB is used, otherwise it will use the selected buffer size. --- writeengine/splitter/we_cmdargs.h | 1 + writeengine/splitter/we_filereadthread.cpp | 18 ++++++++++++++---- writeengine/splitter/we_filereadthread.h | 5 +++-- writeengine/splitter/we_sdhandler.cpp | 7 +++++++ writeengine/splitter/we_sdhandler.h | 1 + 5 files changed, 26 insertions(+), 6 deletions(-) diff --git a/writeengine/splitter/we_cmdargs.h b/writeengine/splitter/we_cmdargs.h index 96e06a4bc..803f77c54 100644 --- a/writeengine/splitter/we_cmdargs.h +++ b/writeengine/splitter/we_cmdargs.h @@ -77,6 +77,7 @@ class WECmdArgs char getDelimChar() { return fColDelim; } ImportDataMode getImportDataMode() const { return fImportDataMode; } bool getConsoleLog() { return fConsoleLog; } + int getReadBufSize() { return fReadBufSize; } bool isCpimportInvokeMode(){return (fBlockMode3)? false : fCpiInvoke;} bool isQuiteMode() const { return fQuiteMode; } diff --git a/writeengine/splitter/we_filereadthread.cpp b/writeengine/splitter/we_filereadthread.cpp index 6e4fcb4a7..185557965 100644 --- a/writeengine/splitter/we_filereadthread.cpp +++ b/writeengine/splitter/we_filereadthread.cpp @@ -87,6 +87,15 @@ WEFileReadThread::WEFileReadThread(WESDHandler& aSdh):fSdh(aSdh), { //TODO batch qty to get from config fBatchQty = 10000; + if (fSdh.getReadBufSize() < DEFAULTBUFFSIZE) + { + fBuffSize = DEFAULTBUFFSIZE; + } + else + { + fBuffSize = fSdh.getReadBufSize(); + } + fBuff = new char [fBuffSize]; } @@ -106,6 +115,7 @@ WEFileReadThread::~WEFileReadThread() delete fpThread; } fpThread=0; + delete []fBuff; //cout << "WEFileReadThread destructor called" << endl; } @@ -330,16 +340,16 @@ unsigned int WEFileReadThread::readDataFile(messageqcpp::SBS& Sbs) if(fEnclEsc) { //pStart = aBuff; - aLen = getNextRow(fInFile, fBuff, sizeof(fBuff)-1); + aLen = getNextRow(fInFile, fBuff, fBuffSize-1); } else { - fInFile.getline(fBuff, sizeof(fBuff)-1); + fInFile.getline(fBuff, fBuffSize-1); aLen=fInFile.gcount(); } ////aLen chars incl \n, Therefore aLen-1; '<<' oper won't go past it //cout << "Data Length " << aLen <0)) + if((aLen < (fBuffSize-2)) && (aLen>0)) { fBuff[aLen-1] = '\n'; fBuff[aLen]=0; @@ -348,7 +358,7 @@ unsigned int WEFileReadThread::readDataFile(messageqcpp::SBS& Sbs) aIdx++; if(fSdh.getDebugLvl()>2) cout << "File data line = " << aIdx <=sizeof(fBuff)-2) //Didn't hit delim; BIG ROW + else if(aLen>=fBuffSize-2) //Didn't hit delim; BIG ROW { cout <<"Bad Row data " << endl; cout << fBuff << endl; diff --git a/writeengine/splitter/we_filereadthread.h b/writeengine/splitter/we_filereadthread.h index 623184e8d..f9a486d5c 100644 --- a/writeengine/splitter/we_filereadthread.h +++ b/writeengine/splitter/we_filereadthread.h @@ -98,7 +98,7 @@ public: void add2InputDataFileList(std::string& FileName); private: - enum { MAXBUFFSIZE=1024*1024 }; + enum { DEFAULTBUFFSIZE=1024*1024 }; // don't allow anyone else to set void setTgtPmId(unsigned int fTgtPmId) { this->fTgtPmId = fTgtPmId; } @@ -120,7 +120,8 @@ private: char fEncl; // Encl char char fEsc; // Esc char char fDelim; // Column Delimit char - char fBuff[MAXBUFFSIZE]; // main data buffer + char* fBuff; // main data buffer + int fBuffSize; }; } /* namespace WriteEngine */ diff --git a/writeengine/splitter/we_sdhandler.cpp b/writeengine/splitter/we_sdhandler.cpp index 56ace80dc..78019b338 100644 --- a/writeengine/splitter/we_sdhandler.cpp +++ b/writeengine/splitter/we_sdhandler.cpp @@ -2301,6 +2301,13 @@ char WESDHandler::getEscChar() //------------------------------------------------------------------------------ +int WESDHandler::getReadBufSize() +{ + return fRef.fCmdArgs.getReadBufSize(); +} + +//------------------------------------------------------------------------------ + char WESDHandler::getDelimChar() { return fRef.fCmdArgs.getDelimChar(); diff --git a/writeengine/splitter/we_sdhandler.h b/writeengine/splitter/we_sdhandler.h index 2d4b20cc2..ff6bc829e 100644 --- a/writeengine/splitter/we_sdhandler.h +++ b/writeengine/splitter/we_sdhandler.h @@ -149,6 +149,7 @@ public: char getEscChar(); char getDelimChar(); bool getConsoleLog(); + int getReadBufSize(); ImportDataMode getImportDataMode() const; void sysLog(const logging::Message::Args& msgArgs, logging::LOG_TYPE logType, logging::Message::MessageID msgId); From fbf2f2e979499a1ccf2d892133ff7cda28f41bd1 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Thu, 10 May 2018 17:35:38 +0100 Subject: [PATCH 002/123] MCOL-1403 Remove whitespace trimming on constants This appears to be to fix equality matches in InfiniDB but at the same time it breaks LIKE processing. Equality matching with trailing whitespace was fixed in MCOL-1246 so the old InfiniDB patch can be removed. --- dbcon/joblist/jlf_execplantojoblist.cpp | 12 ------------ dbcon/mysql/ha_calpont_execplan.cpp | 4 ---- 2 files changed, 16 deletions(-) diff --git a/dbcon/joblist/jlf_execplantojoblist.cpp b/dbcon/joblist/jlf_execplantojoblist.cpp index b6242ba77..cfd694f9b 100644 --- a/dbcon/joblist/jlf_execplantojoblist.cpp +++ b/dbcon/joblist/jlf_execplantojoblist.cpp @@ -1500,10 +1500,7 @@ const JobStepVector doSimpleFilter(SimpleFilter* sf, JobInfo& jobInfo) return doExpressionFilter(sf, jobInfo); } - // trim trailing space char in the predicate string constval(cc->constval()); - size_t spos = constval.find_last_not_of(" "); - if (spos != string::npos) constval = constval.substr(0, spos+1); CalpontSystemCatalog::OID dictOid = 0; CalpontSystemCatalog::ColType ct = sc->colType(); @@ -2569,10 +2566,7 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo) if (ConstantColumn::NULLDATA == cc->type() && (opeq == *sop || opne == *sop)) cop = COMPARE_NIL; - // trim trailing space char string value = cc->constval(); - size_t spos = value.find_last_not_of(" "); - if (spos != string::npos) value = value.substr(0, spos+1); pds->addFilter(cop, value); } @@ -2652,10 +2646,7 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo) if (ConstantColumn::NULLDATA == cc->type() && (opeq == *sop || opne == *sop)) cop = COMPARE_NIL; - // trim trailing space char string value = cc->constval(); - size_t spos = value.find_last_not_of(" "); - if (spos != string::npos) value = value.substr(0, spos+1); pds->addFilter(cop, value); } @@ -2759,9 +2750,6 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo) int8_t cop = op2num(sop); int64_t value = 0; string constval = cc->constval(); - // trim trailing space char - size_t spos = constval.find_last_not_of(" "); - if (spos != string::npos) constval = constval.substr(0, spos+1); // @bug 1151 string longer than colwidth of char/varchar. uint8_t rf = 0; diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 9cc3a99c8..a30d71688 100755 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4099,7 +4099,6 @@ void gp_walk(const Item *item, void *arg) Item_string* isp = (Item_string*)item; if (isp) { - // @bug 3669. trim trailing spaces for the compare value if (isp->result_type() == STRING_RESULT) { String val, *str = isp->val_str(&val); @@ -4108,9 +4107,6 @@ void gp_walk(const Item *item, void *arg) { cval.assign(str->ptr(), str->length()); } - size_t spos = cval.find_last_not_of(" "); - if (spos != string::npos) - cval = cval.substr(0, spos+1); gwip->rcWorkStack.push(new ConstantColumn(cval)); break; } From baf42e7b4a83ce88e89ed7d551f2286e2016de1b Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Thu, 10 May 2018 18:32:22 +0100 Subject: [PATCH 003/123] MCOL-1390 Fix SUBSTRING_INDEX for negative count If negative count number is more than the number of characters in the string then it should always return the string. For example if a table contains SUBSTRING_INDEX('zzz', 'z', -5) should return 'zzz'. Before this patch it would return NULL. --- utils/funcexp/func_substring_index.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/funcexp/func_substring_index.cpp b/utils/funcexp/func_substring_index.cpp index e3ec80b5f..a4b9fb9a6 100644 --- a/utils/funcexp/func_substring_index.cpp +++ b/utils/funcexp/func_substring_index.cpp @@ -71,6 +71,9 @@ std::string Func_substring_index::getStrVal(rowgroup::Row& row, if ( count > (int64_t) end ) return str; + if (( count < 0 ) && ((count * -1) > end)) + return str; + string value = str; if ( count > 0 ) { From 82e8ab7518ea869e37cc1d724ce570164316a6ba Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 11 May 2018 09:50:10 -0500 Subject: [PATCH 004/123] MCOL-1201 manual rebase with develop. Obsoletes branch MCOL-1201 --- dbcon/execplan/aggregatecolumn.cpp | 96 +-- dbcon/execplan/aggregatecolumn.h | 44 +- dbcon/joblist/expressionstep.cpp | 12 +- dbcon/joblist/expressionstep.h | 1 + dbcon/joblist/groupconcat.cpp | 2 +- dbcon/joblist/joblistfactory.cpp | 531 ++++++++---- dbcon/joblist/tupleaggregatestep.cpp | 280 +++++-- dbcon/mysql/ha_calpont_execplan.cpp | 858 +++++++++++--------- dbcon/mysql/ha_calpont_impl.cpp | 8 +- dbcon/mysql/ha_window_function.cpp | 37 +- utils/common/any.hpp | 270 +++--- utils/rowgroup/rowaggregation.cpp | 605 +++++++++----- utils/rowgroup/rowaggregation.h | 29 +- utils/udfsdk/CMakeLists.txt | 2 +- utils/udfsdk/allnull.cpp | 7 +- utils/udfsdk/allnull.h | 4 +- utils/udfsdk/avg_mode.cpp | 14 +- utils/udfsdk/avg_mode.h | 14 +- utils/udfsdk/mcsv1_udaf.cpp | 13 +- utils/udfsdk/mcsv1_udaf.h | 88 +- utils/udfsdk/median.cpp | 14 +- utils/udfsdk/median.h | 8 +- utils/udfsdk/ssq.cpp | 14 +- utils/udfsdk/ssq.h | 8 +- utils/udfsdk/udfmysql.cpp | 162 ++++ utils/udfsdk/udfsdk.vpj | 4 + utils/windowfunction/wf_udaf.cpp | 280 +++++-- utils/windowfunction/wf_udaf.h | 27 +- utils/windowfunction/windowfunctiontype.cpp | 8 +- writeengine/wrapper/writeengine.cpp | 10 +- 30 files changed, 2255 insertions(+), 1195 deletions(-) diff --git a/dbcon/execplan/aggregatecolumn.cpp b/dbcon/execplan/aggregatecolumn.cpp index 18cba2607..5bce12d79 100644 --- a/dbcon/execplan/aggregatecolumn.cpp +++ b/dbcon/execplan/aggregatecolumn.cpp @@ -98,36 +98,6 @@ AggregateColumn::AggregateColumn(const uint32_t sessionID): { } -AggregateColumn::AggregateColumn(const AggOp aggOp, ReturnedColumn* parm, const uint32_t sessionID): - ReturnedColumn(sessionID), - fAggOp(aggOp), - fAsc(false), - fData(aggOp + "(" + parm->data() + ")") -{ - fFunctionParms.reset(parm); -} - -AggregateColumn::AggregateColumn(const AggOp aggOp, const string& content, const uint32_t sessionID): - ReturnedColumn(sessionID), - fAggOp(aggOp), - fAsc(false), - fData(aggOp + "(" + content + ")") -{ - // TODO: need to handle distinct - fFunctionParms.reset(new ArithmeticColumn(content)); -} - -// deprecated constructor. use function name as string -AggregateColumn::AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID): - ReturnedColumn(sessionID), - fFunctionName(functionName), - fAggOp(NOOP), - fAsc(false), - fData(functionName + "(" + parm->data() + ")") -{ - fFunctionParms.reset(parm); -} - // deprecated constructor. use function name as string AggregateColumn::AggregateColumn(const string& functionName, const string& content, const uint32_t sessionID): ReturnedColumn(sessionID), @@ -137,20 +107,21 @@ AggregateColumn::AggregateColumn(const string& functionName, const string& conte fData(functionName + "(" + content + ")") { // TODO: need to handle distinct - fFunctionParms.reset(new ArithmeticColumn(content)); + SRCP srcp(new ArithmeticColumn(content)); + fAggParms.push_back(srcp); } AggregateColumn::AggregateColumn( const AggregateColumn& rhs, const uint32_t sessionID ): ReturnedColumn(rhs, sessionID), fFunctionName (rhs.fFunctionName), fAggOp(rhs.fAggOp), - fFunctionParms(rhs.fFunctionParms), fTableAlias(rhs.tableAlias()), fAsc(rhs.asc()), fData(rhs.data()), fConstCol(rhs.fConstCol) { fAlias = rhs.alias(); + fAggParms = rhs.fAggParms; } /** @@ -166,10 +137,14 @@ const string AggregateColumn::toString() const if (fAlias.length() > 0) output << "/Alias: " << fAlias << endl; - if (fFunctionParms == 0) - output << "No arguments" << endl; + if (fAggParms.size() == 0) + output << "No arguments"; else - output << *fFunctionParms << endl; + for (uint32_t i = 0; i < fAggParms.size(); ++i) + { + output << *(fAggParms[i]) << " "; + } + output << endl; if (fConstCol) output << *fConstCol; @@ -191,10 +166,11 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const b << fFunctionName; b << static_cast(fAggOp); - if (fFunctionParms == 0) - b << (uint8_t) ObjectReader::NULL_CLASS; - else - fFunctionParms->serialize(b); + b << static_cast(fAggParms.size()); + for (uint32_t i = 0; i < fAggParms.size(); ++i) + { + fAggParms[i]->serialize(b); + } b << static_cast(fGroupByColList.size()); @@ -219,20 +195,26 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const void AggregateColumn::unserialize(messageqcpp::ByteStream& b) { - ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN); - fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end()); - fProjectColList.erase(fProjectColList.begin(), fProjectColList.end()); - ReturnedColumn::unserialize(b); - b >> fFunctionName; - b >> fAggOp; - //delete fFunctionParms; - fFunctionParms.reset( - dynamic_cast(ObjectReader::createTreeNode(b))); - messageqcpp::ByteStream::quadbyte size; messageqcpp::ByteStream::quadbyte i; ReturnedColumn* rc; + ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN); + fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end()); + fProjectColList.erase(fProjectColList.begin(), fProjectColList.end()); + fAggParms.erase(fAggParms.begin(), fAggParms.end()); + ReturnedColumn::unserialize(b); + b >> fFunctionName; + b >> fAggOp; + + b >> size; + for (i = 0; i < size; i++) + { + rc = dynamic_cast(ObjectReader::createTreeNode(b)); + SRCP srcp(rc); + fAggParms.push_back(srcp); + } + b >> size; for (i = 0; i < size; i++) @@ -261,6 +243,7 @@ void AggregateColumn::unserialize(messageqcpp::ByteStream& b) bool AggregateColumn::operator==(const AggregateColumn& t) const { const ReturnedColumn* rc1, *rc2; + AggParms::const_iterator it, it2; rc1 = static_cast(this); rc2 = static_cast(&t); @@ -277,16 +260,18 @@ bool AggregateColumn::operator==(const AggregateColumn& t) const if (fAggOp != t.fAggOp) return false; - if (fFunctionParms.get() != NULL && t.fFunctionParms.get() != NULL) + if (aggParms().size() != t.aggParms().size()) { - if (*fFunctionParms.get() != t.fFunctionParms.get()) + return false; + } + for (it = fAggParms.begin(), it2 = t.fAggParms.begin(); + it != fAggParms.end(); + ++it, ++it2) + { + if (**it != **it2) return false; } - else if (fFunctionParms.get() != NULL || t.fFunctionParms.get() != NULL) - return false; - //if (fAlias != t.fAlias) - // return false; if (fTableAlias != t.fTableAlias) return false; @@ -645,3 +630,4 @@ AggregateColumn::AggOp AggregateColumn::agname2num(const string& agname) } } // namespace execplan + diff --git a/dbcon/execplan/aggregatecolumn.h b/dbcon/execplan/aggregatecolumn.h index d1db7e5a4..b0884f179 100644 --- a/dbcon/execplan/aggregatecolumn.h +++ b/dbcon/execplan/aggregatecolumn.h @@ -40,6 +40,8 @@ class ByteStream; namespace execplan { +typedef std::vector AggParms; + /** * @brief A class to represent a aggregate return column * @@ -74,7 +76,8 @@ public: BIT_OR, BIT_XOR, GROUP_CONCAT, - UDAF + UDAF, + MULTI_PARM }; /** @@ -94,21 +97,6 @@ public: */ AggregateColumn(const uint32_t sessionID); - /** - * ctor - */ - AggregateColumn(const AggOp aggop, ReturnedColumn* parm, const uint32_t sessionID = 0); - - /** - * ctor - */ - AggregateColumn(const AggOp aggop, const std::string& content, const uint32_t sessionID = 0); - - /** - * ctor - */ - AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID = 0); - /** * ctor */ @@ -155,24 +143,27 @@ public: fAggOp = aggOp; } + /** get function parms - * - * set the function parms from this object */ - virtual const SRCP functionParms() const + virtual AggParms& aggParms() { - return fFunctionParms; + return fAggParms; + } + + virtual const AggParms& aggParms() const + { + return fAggParms; } /** set function parms - * - * set the function parms for this object */ - virtual void functionParms(const SRCP& functionParms) + virtual void aggParms(const AggParms& parms) { - fFunctionParms = functionParms; + fAggParms = parms; } + /** return a copy of this pointer * * deep copy of this pointer and return the copy @@ -325,9 +316,10 @@ protected: uint8_t fAggOp; /** - * A ReturnedColumn objects that are the arguments to this function + * ReturnedColumn objects that are the arguments to this + * function */ - SRCP fFunctionParms; + AggParms fAggParms; /** table alias * A string to represent table alias name which contains this column diff --git a/dbcon/joblist/expressionstep.cpp b/dbcon/joblist/expressionstep.cpp index 0e064c359..4a8a14ff3 100644 --- a/dbcon/joblist/expressionstep.cpp +++ b/dbcon/joblist/expressionstep.cpp @@ -56,6 +56,17 @@ using namespace rowgroup; namespace joblist { +ExpressionStep::ExpressionStep() : + fExpressionFilter(NULL), + fExpressionId(-1), + fVarBinOK(false), + fSelectFilter(false), + fAssociatedJoinId(0), + fDoJoin(false), + fVirtual(false) +{ +} + ExpressionStep::ExpressionStep(const JobInfo& jobInfo) : JobStep(jobInfo), fExpressionFilter(NULL), @@ -68,7 +79,6 @@ ExpressionStep::ExpressionStep(const JobInfo& jobInfo) : { } - ExpressionStep::ExpressionStep(const ExpressionStep& rhs) : JobStep(rhs), fExpression(rhs.expression()), diff --git a/dbcon/joblist/expressionstep.h b/dbcon/joblist/expressionstep.h index 4a069440f..63423fc7d 100644 --- a/dbcon/joblist/expressionstep.h +++ b/dbcon/joblist/expressionstep.h @@ -50,6 +50,7 @@ class ExpressionStep : public JobStep { public: // constructors + ExpressionStep(); ExpressionStep(const JobInfo&); // destructor constructors virtual ~ExpressionStep(); diff --git a/dbcon/joblist/groupconcat.cpp b/dbcon/joblist/groupconcat.cpp index 234fc0a8e..afc91a2ec 100644 --- a/dbcon/joblist/groupconcat.cpp +++ b/dbcon/joblist/groupconcat.cpp @@ -78,7 +78,7 @@ void GroupConcatInfo::prepGroupConcat(JobInfo& jobInfo) while (i != jobInfo.groupConcatCols.end()) { GroupConcatColumn* gcc = dynamic_cast(i->get()); - const RowColumn* rcp = dynamic_cast(gcc->functionParms().get()); + const RowColumn* rcp = dynamic_cast(gcc->aggParms()[0].get()); SP_GroupConcat groupConcat(new GroupConcat); groupConcat->fSeparator = gcc->separator(); diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index a48ecd13a..4cf7bccc5 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -18,7 +18,6 @@ // $Id: joblistfactory.cpp 9632 2013-06-18 22:18:20Z xlou $ - #include #include #include @@ -870,7 +869,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo if (gcc != NULL) { - srcp = gcc->functionParms(); + srcp = gcc->aggParms()[0]; const RowColumn* rcp = dynamic_cast(srcp.get()); const vector& cols = rcp->columnVec(); @@ -891,21 +890,55 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } +#if 0 + // MCOL-1201 Add support for multi-parameter UDAnF + UDAFColumn* udafc = dynamic_cast(retCols[i].get()); + if (udafc != NULL) + { + srcp = udafc->aggParms()[0]; + const RowColumn* rcp = dynamic_cast(srcp.get()); + const vector& cols = rcp->columnVec(); + for (vector::const_iterator j = cols.begin(); j != cols.end(); j++) + { + srcp = *j; + if (dynamic_cast(srcp.get()) == NULL) + retCols.push_back(srcp); + + // Do we need this? + const ArithmeticColumn* ac = dynamic_cast(srcp.get()); + const FunctionColumn* fc = dynamic_cast(srcp.get()); + if (ac != NULL || fc != NULL) + { + // bug 3728, make a dummy expression step for each expression. + scoped_ptr es(new ExpressionStep(jobInfo)); + es->expression(srcp, jobInfo); + } + } + continue; + } +#endif srcp = retCols[i]; const AggregateColumn* ag = dynamic_cast(retCols[i].get()); - - if (ag != NULL) - srcp = ag->functionParms(); - - const ArithmeticColumn* ac = dynamic_cast(srcp.get()); - const FunctionColumn* fc = dynamic_cast(srcp.get()); - - if (ac != NULL || fc != NULL) + // bug 3728 Make a dummy expression for srcp if it is an + // expression. This is needed to fill in some stuff. + // Note that es.expression does nothing if the item is not an expression. + if (ag == NULL) { - // bug 3728, make a dummy expression step for each expression. - scoped_ptr es(new ExpressionStep(jobInfo)); - es->expression(srcp, jobInfo); + // Not an aggregate. Make a dummy expression for the item + ExpressionStep es; + es.expression(srcp, jobInfo); + } + else + { + // MCOL-1201 multi-argument aggregate. make a dummy expression + // step for each argument that is an expression. + for (uint32_t i = 0; i < ag->aggParms().size(); ++i) + { + srcp = ag->aggParms()[i]; + ExpressionStep es; + es.expression(srcp, jobInfo); + } } } @@ -915,17 +948,18 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo { srcp = retCols[i]; const SimpleColumn* sc = dynamic_cast(srcp.get()); + AggregateColumn* aggc = dynamic_cast(srcp.get()); bool doDistinct = (csep->distinct() && csep->groupByCols().empty()); uint32_t tupleKey = -1; string alias; string view; - // returned column could be groupby column, a simplecoulumn not a agregatecolumn + // returned column could be groupby column, a simplecoulumn not an aggregatecolumn int op = 0; CalpontSystemCatalog::OID dictOid = 0; CalpontSystemCatalog::ColType ct, aggCt; - if (sc == NULL) + if (aggc) { GroupConcatColumn* gcc = dynamic_cast(retCols[i].get()); @@ -939,7 +973,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo tupleKey = ti.key; jobInfo.returnedColVec.push_back(make_pair(tupleKey, gcc->aggOp())); // not a tokenOnly column. Mark all the columns involved - srcp = gcc->functionParms(); + srcp = gcc->aggParms()[0]; const RowColumn* rowCol = dynamic_cast(srcp.get()); if (rowCol) @@ -963,186 +997,353 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } - - AggregateColumn* ac = dynamic_cast(retCols[i].get()); - - if (ac != NULL) + else { - srcp = ac->functionParms(); - sc = dynamic_cast(srcp.get()); + // Aggregate column not group concat + AggParms& aggParms = aggc->aggParms(); - if (ac->constCol().get() != NULL) + for (uint32_t parm = 0; parm < aggParms.size(); ++parm) { - // replace the aggregate on constant with a count(*) - SRCP clone; - UDAFColumn* udafc = dynamic_cast(ac); - - if (udafc) + if (aggc->constCol().get() != NULL) { - clone.reset(new UDAFColumn(*udafc, ac->sessionID())); + // replace the aggregate on constant with a count(*) + SRCP clone; + UDAFColumn* udafc = dynamic_cast(aggc); + + if (udafc) + { + clone.reset(new UDAFColumn(*udafc, aggc->sessionID())); + } + else + { + clone.reset(new AggregateColumn(*aggc, aggc->sessionID())); + } + + jobInfo.constAggregate.insert(make_pair(i, clone)); + aggc->aggOp(AggregateColumn::COUNT_ASTERISK); + aggc->distinct(false); + } + + srcp = aggParms[parm]; + sc = dynamic_cast(srcp.get()); + if (parm == 0) + { + op = aggc->aggOp(); } else { - clone.reset(new AggregateColumn(*ac, ac->sessionID())); + op = AggregateColumn::MULTI_PARM; + } + doDistinct = aggc->distinct(); + if (aggParms.size() == 1) + { + // Set the col type based on the single parm. + // Changing col type based on a parm if multiple parms + // doesn't really make sense. + updateAggregateColType(aggc, srcp, op, jobInfo); + } + aggCt = aggc->resultType(); + + // As of bug3695, make sure varbinary is not used in aggregation. + // TODO: allow for UDAF + if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY) + throw runtime_error ("VARBINARY in aggregate function is not supported."); + + // Project the parm columns or expressions + if (sc != NULL) + { + CalpontSystemCatalog::OID retOid = sc->oid(); + CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); + alias = extractTableAlias(sc); + view = sc->viewName(); + + if (!sc->schemaName().empty()) + { + ct = sc->colType(); + + //XXX use this before connector sets colType in sc correctly. + if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) + ct = jobInfo.csc->colType(sc->oid()); + + //X + dictOid = isDictCol(ct); + } + else + { + retOid = (tblOid + 1) + sc->colPosition(); + ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; + } + + TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); + tupleKey = ti.key; + + // this is a string column + if (dictOid > 0) + { + map::iterator findit = jobInfo.tokenOnly.find(tupleKey); + + // if the column has never seen, and the op is count: possible need count only. + if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) + { + if (findit == jobInfo.tokenOnly.end()) + jobInfo.tokenOnly[tupleKey] = true; + } + // if aggregate other than count, token is not enough. + else if (op != 0 || doDistinct) + { + jobInfo.tokenOnly[tupleKey] = false; + } + + findit = jobInfo.tokenOnly.find(tupleKey); + + if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) + { + dictMap[tupleKey] = dictOid; + jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; + ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); + jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + } + } + } + else + { + const ArithmeticColumn* ac = NULL; + const FunctionColumn* fc = NULL; + const WindowFunctionColumn* wc = NULL; + bool hasAggCols = false; + + if ((ac = dynamic_cast(srcp.get())) != NULL) + { + if (ac->aggColumnList().size() > 0) + hasAggCols = true; + } + else if ((fc = dynamic_cast(srcp.get())) != NULL) + { + if (fc->aggColumnList().size() > 0) + hasAggCols = true; + } + else if (dynamic_cast(srcp.get()) != NULL) + { + std::ostringstream errmsg; + errmsg << "Invalid aggregate function nesting."; + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + else if (dynamic_cast(srcp.get()) != NULL) + { + } + else if ((wc = dynamic_cast(srcp.get())) == NULL) + { + std::ostringstream errmsg; + errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + + uint64_t eid = srcp.get()->expressionId(); + ct = srcp.get()->resultType(); + TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); + tupleKey = ti.key; + + if (hasAggCols) + jobInfo.expressionVec.push_back(tupleKey); } - jobInfo.constAggregate.insert(make_pair(i, clone)); - ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ac->distinct(false); - } + // add to project list + vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - op = ac->aggOp(); - doDistinct = ac->distinct(); - updateAggregateColType(ac, srcp, op, jobInfo); - aggCt = ac->resultType(); + if (keyIt == projectKeys.end()) + { + RetColsVector::iterator it = pcv.end(); - // As of bug3695, make sure varbinary is not used in aggregation. - if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY) - throw runtime_error ("VARBINARY in aggregate function is not supported."); - } - } + if (doDistinct) + it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); + else + it = pcv.insert(pcv.end(), srcp); - // simple column selected or aggregated - if (sc != NULL) - { - // one column only need project once - CalpontSystemCatalog::OID retOid = sc->oid(); - CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); - alias = extractTableAlias(sc); - view = sc->viewName(); + projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); + } + else if (doDistinct) // @bug4250, move forward distinct column if necessary. + { + uint32_t pos = distance(projectKeys.begin(), keyIt); - if (!sc->schemaName().empty()) - { - ct = sc->colType(); + if (pos >= lastGroupByPos) + { + pcv[pos] = pcv[lastGroupByPos]; + pcv[lastGroupByPos] = srcp; + projectKeys[pos] = projectKeys[lastGroupByPos]; + projectKeys[lastGroupByPos] = tupleKey; + lastGroupByPos++; + } + } -//XXX use this before connector sets colType in sc correctly. - if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) - ct = jobInfo.csc->colType(sc->oid()); + if (doDistinct && dictOid > 0) + tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; -//X - dictOid = isDictCol(ct); - } - else - { - retOid = (tblOid + 1) + sc->colPosition(); - ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; - } + // remember the columns to be returned + jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); - TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); - tupleKey = ti.key; + if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) + jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; - // this is a string column - if (dictOid > 0) - { - map::iterator findit = jobInfo.tokenOnly.find(tupleKey); - - // if the column has never seen, and the op is count: possible need count only. - if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) - { - if (findit == jobInfo.tokenOnly.end()) - jobInfo.tokenOnly[tupleKey] = true; - } - // if aggregate other than count, token is not enough. - else if (op != 0 || doDistinct) - { - jobInfo.tokenOnly[tupleKey] = false; - } - - findit = jobInfo.tokenOnly.find(tupleKey); - - if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) - { - dictMap[tupleKey] = dictOid; - jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; - ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); - jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } } else { - const ArithmeticColumn* ac = NULL; - const FunctionColumn* fc = NULL; - const WindowFunctionColumn* wc = NULL; - bool hasAggCols = false; - - if ((ac = dynamic_cast(srcp.get())) != NULL) + // Not an Aggregate + // simple column selected + if (sc != NULL) { - if (ac->aggColumnList().size() > 0) - hasAggCols = true; + // one column only need project once + CalpontSystemCatalog::OID retOid = sc->oid(); + CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); + alias = extractTableAlias(sc); + view = sc->viewName(); + + if (!sc->schemaName().empty()) + { + ct = sc->colType(); + + //XXX use this before connector sets colType in sc correctly. + if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) + ct = jobInfo.csc->colType(sc->oid()); + + //X + dictOid = isDictCol(ct); + } + else + { + retOid = (tblOid + 1) + sc->colPosition(); + ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; + } + + TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); + tupleKey = ti.key; + + // this is a string column + if (dictOid > 0) + { + map::iterator findit = jobInfo.tokenOnly.find(tupleKey); + + // if the column has never seen, and the op is count: possible need count only. + if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) + { + if (findit == jobInfo.tokenOnly.end()) + jobInfo.tokenOnly[tupleKey] = true; + } + // if aggregate other than count, token is not enough. + else if (op != 0 || doDistinct) + { + jobInfo.tokenOnly[tupleKey] = false; + } + + findit = jobInfo.tokenOnly.find(tupleKey); + + if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) + { + dictMap[tupleKey] = dictOid; + jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; + ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); + jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + } + } } - else if ((fc = dynamic_cast(srcp.get())) != NULL) - { - if (fc->aggColumnList().size() > 0) - hasAggCols = true; - } - else if (dynamic_cast(srcp.get()) != NULL) - { - std::ostringstream errmsg; - errmsg << "Invalid aggregate function nesting."; - cerr << boldStart << errmsg.str() << boldStop << endl; - throw logic_error(errmsg.str()); - } - else if ((wc = dynamic_cast(srcp.get())) == NULL) - { - std::ostringstream errmsg; - errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); - cerr << boldStart << errmsg.str() << boldStop << endl; - throw logic_error(errmsg.str()); - } - - uint64_t eid = srcp.get()->expressionId(); - ct = srcp.get()->resultType(); - TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); - tupleKey = ti.key; - - if (hasAggCols) - jobInfo.expressionVec.push_back(tupleKey); - } - - // add to project list - vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - - if (keyIt == projectKeys.end()) - { - RetColsVector::iterator it = pcv.end(); - - if (doDistinct) - it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); else - it = pcv.insert(pcv.end(), srcp); - - projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); - } - else if (doDistinct) // @bug4250, move forward distinct column if necessary. - { - uint32_t pos = distance(projectKeys.begin(), keyIt); - - if (pos >= lastGroupByPos) { - pcv[pos] = pcv[lastGroupByPos]; - pcv[lastGroupByPos] = srcp; - projectKeys[pos] = projectKeys[lastGroupByPos]; - projectKeys[lastGroupByPos] = tupleKey; - lastGroupByPos++; + const ArithmeticColumn* ac = NULL; + const FunctionColumn* fc = NULL; + const WindowFunctionColumn* wc = NULL; + bool hasAggCols = false; + + if ((ac = dynamic_cast(srcp.get())) != NULL) + { + if (ac->aggColumnList().size() > 0) + hasAggCols = true; + } + else if ((fc = dynamic_cast(srcp.get())) != NULL) + { + if (fc->aggColumnList().size() > 0) + hasAggCols = true; + } + else if (dynamic_cast(srcp.get()) != NULL) + { + std::ostringstream errmsg; + errmsg << "Invalid aggregate function nesting."; + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + else if (dynamic_cast(srcp.get()) != NULL) + { + } + else if ((wc = dynamic_cast(srcp.get())) == NULL) + { + std::ostringstream errmsg; + errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + + uint64_t eid = srcp.get()->expressionId(); + ct = srcp.get()->resultType(); + TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); + tupleKey = ti.key; + + if (hasAggCols) + jobInfo.expressionVec.push_back(tupleKey); } - } - if (doDistinct && dictOid > 0) - tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + // add to project list + vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - // remember the columns to be returned - jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + if (keyIt == projectKeys.end()) + { + RetColsVector::iterator it = pcv.end(); - if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) - jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + if (doDistinct) + it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); + else + it = pcv.insert(pcv.end(), srcp); - // bug 1499 distinct processing, save unique distinct columns - if (doDistinct && - (jobInfo.distinctColVec.end() == - find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) - { - jobInfo.distinctColVec.push_back(tupleKey); + projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); + } + else if (doDistinct) // @bug4250, move forward distinct column if necessary. + { + uint32_t pos = distance(projectKeys.begin(), keyIt); + + if (pos >= lastGroupByPos) + { + pcv[pos] = pcv[lastGroupByPos]; + pcv[lastGroupByPos] = srcp; + projectKeys[pos] = projectKeys[lastGroupByPos]; + projectKeys[lastGroupByPos] = tupleKey; + lastGroupByPos++; + } + } + + if (doDistinct && dictOid > 0) + tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + + // remember the columns to be returned + jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + + if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) + jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 3dbd01311..21c7c0af6 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -164,6 +164,9 @@ inline RowAggFunctionType functionIdMap(int planFuncId) case AggregateColumn::UDAF: return ROWAGG_UDAF; + case AggregateColumn::MULTI_PARM: + return ROWAGG_MULTI_PARM; + default: return ROWAGG_FUNCT_UNDEFINE; } @@ -1302,7 +1305,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -1468,7 +1471,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (!udafFuncCol) { - throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); @@ -1483,6 +1486,17 @@ void TupleAggregateStep::prep1PhaseAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(key); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + widthAgg.push_back(width[colProj]); + } + break; + default: { ostringstream emsg; @@ -1560,7 +1574,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(3)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVec[i]->fAuxColumnIndex = lastCol++; @@ -1675,7 +1689,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation map projColPosMap; @@ -1848,7 +1862,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -2043,7 +2057,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (!udafFuncCol) { - throw logic_error("prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); } // Return column @@ -2065,6 +2079,18 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(aggKey); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + widthAgg.push_back(widthProj[colProj]); + ++colAgg; + } + break; + default: { ostringstream emsg; @@ -2111,7 +2137,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( groupByNoDist.push_back(groupby); aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i)); } - + + projColsUDAFIndex = 0; // locate the return column position in aggregated rowgroup for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -2121,6 +2148,14 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colAgg = -1; + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + + if (udafc) + pUDAFFunc = udafc->getContext().getFunction(); + } + if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { @@ -2432,11 +2467,37 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { - SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + // update the aggregate function vector + SP_ROWAGG_FUNC_t funct; + if (aggOp == ROWAGG_UDAF) + { + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } + } + else + { + funct.reset(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + } if (aggOp == ROWAGG_COUNT_NO_OP) funct->fAuxColumnIndex = colAgg; @@ -2549,7 +2610,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep1PhaseDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVec2[i]->fAuxColumnIndex = lastCol++; @@ -2893,7 +2954,7 @@ void TupleAggregateStep::prep2PhasesAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation vector width; map projColPosMap; @@ -3036,12 +3097,11 @@ void TupleAggregateStep::prep2PhasesAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAggPm)); break; } - } if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -3240,7 +3300,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } oidsAggPm.push_back(oidsProj[colProj]); @@ -3261,6 +3321,18 @@ void TupleAggregateStep::prep2PhasesAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(scaleProj[colProj]); + precisionAggPm.push_back(precisionProj[colProj]); + typeAggPm.push_back(typeProj[colProj]); + widthAggPm.push_back(width[colProj]); + colAggPm++; + } + break; + default: { ostringstream emsg; @@ -3278,11 +3350,16 @@ void TupleAggregateStep::prep2PhasesAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap; AGG_MAP aggDupFuncMap; + projColsUDAFIndex = 0; // copy over the groupby vector // update the outputColumnIndex if returned for (uint64_t i = 0; i < groupByPm.size(); i++) @@ -3299,7 +3376,14 @@ void TupleAggregateStep::prep2PhasesAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colPm = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + ++multiParms; + continue; + } // Is this a UDAF? use the function as part of the key. + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; if (aggOp == ROWAGG_UDAF) @@ -3452,20 +3536,36 @@ void TupleAggregateStep::prep2PhasesAggregate( functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { + // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i)); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i-multiParms)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i-multiParms)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -3517,7 +3617,7 @@ void TupleAggregateStep::prep2PhasesAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = returnedColVec.size() - multiParms; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -3545,7 +3645,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; @@ -3691,6 +3791,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector groupByPm, groupByUm, groupByNoDist; vector functionVecPm, functionNoDistVec, functionVecUm; + list multiParmIndexes; uint32_t bigIntWidth = sizeof(int64_t); map, uint64_t> avgFuncDistMap; @@ -3702,7 +3803,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation vector width; map projColPosMap; @@ -3856,7 +3957,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -4050,7 +4151,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } // Return column @@ -4072,6 +4173,19 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(scaleProj[colProj]); + precisionAggPm.push_back(precisionProj[colProj]); + typeAggPm.push_back(typeProj[colProj]); + widthAggPm.push_back(width[colProj]); + multiParmIndexes.push_back(colAggPm); + colAggPm++; + } + break; + default: { ostringstream emsg; @@ -4093,12 +4207,23 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( groupByUm.push_back(groupby); } + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; for (uint32_t idx = 0; idx < functionVecPm.size(); idx++) + { SP_ROWAGG_FUNC_t funct; SP_ROWAGG_FUNC_t funcPm = functionVecPm[idx]; // UDAF support + if (funcPm->fAggFunction == ROWAGG_MULTI_PARM) + { + // Multi-Parm is not used on the UM + ++multiParms; + continue; + } if (funcPm->fAggFunction == ROWAGG_UDAF) { RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); @@ -4106,7 +4231,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); } else @@ -4116,18 +4241,25 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funcPm->fStatsFunction, funcPm->fOutputColumnIndex, funcPm->fOutputColumnIndex, - funcPm->fAuxColumnIndex)); + funcPm->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); } } - posAggUm = posAggPm; - oidsAggUm = oidsAggPm; - keysAggUm = keysAggPm; - scaleAggUm = scaleAggPm; - precisionAggUm = precisionAggPm; - widthAggUm = widthAggPm; - typeAggUm = typeAggPm; + // Copy over the PM arrays to the UM. Skip any that are a multi-parm entry. + for (uint32_t idx = 0; idx < oidsAggPm.size(); ++idx) + { + if (find (multiParmIndexes.begin(), multiParmIndexes.end(), idx ) != multiParmIndexes.end()) + { + continue; + } + oidsAggUm.push_back(oidsAggPm[idx]); + keysAggUm.push_back(keysAggPm[idx]); + scaleAggUm.push_back(scaleAggPm[idx]); + precisionAggUm.push_back(precisionAggPm[idx]); + widthAggUm.push_back(widthAggPm[idx]); + typeAggUm.push_back(typeAggPm[idx]); + } } @@ -4137,6 +4269,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4159,6 +4295,21 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colUm = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + ++multiParms; + continue; + } + + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + + if (udafc) + pUDAFFunc = udafc->getContext().getFunction(); + } + if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { @@ -4285,7 +4436,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it != aggFuncMap.end()) { - colUm = it->second; + colUm = it->second - multiParms; oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(keysAggUm[colUm]); scaleAggDist.push_back(scaleAggUm[colUm]); @@ -4309,7 +4460,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // false alarm returnColMissing = false; - colUm = it->second; + colUm = it->second - multiParms; if (aggOp == ROWAGG_SUM) { @@ -4412,21 +4563,36 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { + // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - pUDAFFunc = udafc->getContext().getFunction(); - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i)); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i-multiParms)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i-multiParms)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -4480,7 +4646,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = returnedColVec.size() - multiParms; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -4540,7 +4706,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; @@ -4687,6 +4853,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); groupBySub.push_back(groupby); + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; + // tricky part : 2 function vectors // -- dummy function vector for sub-aggregator, which does distinct only // -- aggregate function on this distinct column for rowAggDist @@ -4694,6 +4865,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } if (returnedColVec[k].first != distinctColKey) continue; @@ -4715,7 +4891,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -4732,9 +4908,15 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( { vector functionSub1 = functionNoDistVec; vector functionSub2; + int64_t multiParms = 0; for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } // search non-distinct functions in functionVec vector::iterator it = functionVecUm.begin(); @@ -4752,7 +4934,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } else if (f->fAggFunction == ROWAGG_COUNT_ASTERISK || @@ -4773,7 +4955,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 02fa4d8a4..86dc0bd2f 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4035,6 +4035,10 @@ ParseTree* buildParseTree(Item_func* item, gp_walk_info& gwi, bool& nonSupport) ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { + // MCOL-1201 For UDAnF multiple parameters + vector selCols; + vector orderCols; + if (!(gwi.thd->infinidb_vtable.cal_conn_info)) gwi.thd->infinidb_vtable.cal_conn_info = (void*)(new cal_connection_info()); @@ -4051,6 +4055,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // N.B. argument_count() is the # of formal parms to the agg fcn. InifniDB only supports 1 argument // TODO: Support more than one parm +#if 0 if (isp->argument_count() != 1 && isp->sum_func() != Item_sum::GROUP_CONCAT_FUNC && isp->sum_func() != Item_sum::UDF_SUM_FUNC) { @@ -4058,7 +4063,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_MUL_ARG_AGG); return NULL; } - +#endif AggregateColumn* ac = NULL; if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) @@ -4081,444 +4086,509 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { gwi.fatalParseError = true; gwi.parseErrorText = "Non supported aggregate type on the select clause"; + if (ac) + delete ac; return NULL; } - // special parsing for group_concat - if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + try { - Item_func_group_concat* gc = (Item_func_group_concat*)isp; + + // special parsing for group_concat + if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + Item_func_group_concat* gc = (Item_func_group_concat*)isp; vector orderCols; - RowColumn* rowCol = new RowColumn(); + RowColumn* rowCol = new RowColumn(); vector selCols; - uint32_t select_ctn = gc->count_field(); - ReturnedColumn* rc = NULL; + uint32_t select_ctn = gc->count_field(); + ReturnedColumn* rc = NULL; - for (uint32_t i = 0; i < select_ctn; i++) - { - rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); - - if (!rc || gwi.fatalParseError) - return NULL; - - selCols.push_back(SRCP(rc)); - } - - ORDER** order_item, **end; - - for (order_item = gc->get_order(), - end = order_item + gc->order_field(); order_item < end; - order_item++) - { - Item* ord_col = *(*order_item)->item; - - if (ord_col->type() == Item::INT_ITEM) + for (uint32_t i = 0; i < select_ctn; i++) { - Item_int* id = (Item_int*)ord_col; - - if (id->val_int() > (int)selCols.size()) - { - gwi.fatalParseError = true; - return NULL; - } - - rc = selCols[id->val_int() - 1]->clone(); - rc->orderPos(id->val_int() - 1); - } - else - { - rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); + rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); if (!rc || gwi.fatalParseError) { + if (ac) + delete ac; return NULL; } + + selCols.push_back(SRCP(rc)); } - // 10.2 TODO: direction is now a tri-state flag - rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); - orderCols.push_back(SRCP(rc)); - } + ORDER** order_item, **end; - rowCol->columnVec(selCols); - (dynamic_cast(ac))->orderCols(orderCols); - parm.reset(rowCol); - - if (gc->str_separator()) - { - string separator; - separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); - (dynamic_cast(ac))->separator(separator); - } - } - else - { - for (uint32_t i = 0; i < isp->argument_count(); i++) - { - Item* sfitemp = sfitempp[i]; - Item::Type sfitype = sfitemp->type(); - - switch (sfitype) + for (order_item = gc->get_order(), + end = order_item + gc->order_field(); order_item < end; + order_item++) { - case Item::FIELD_ITEM: - { - Item_field* ifp = reinterpret_cast(sfitemp); - SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + Item* ord_col = *(*order_item)->item; - if (!sc) + if (ord_col->type() == Item::INT_ITEM) + { + Item_int* id = (Item_int*)ord_col; + + if (id->val_int() > (int)selCols.size()) { gwi.fatalParseError = true; - break; + if (ac) + delete ac; + return NULL; } - parm.reset(sc); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); - TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); - gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); - break; + rc = selCols[id->val_int() - 1]->clone(); + rc->orderPos(id->val_int() - 1); } - - case Item::INT_ITEM: - case Item::STRING_ITEM: - case Item::REAL_ITEM: - case Item::DECIMAL_ITEM: + else { - // treat as count(*) - if (ac->aggOp() == AggregateColumn::COUNT) - ac->aggOp(AggregateColumn::COUNT_ASTERISK); + rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::NULL_ITEM: - { - //ac->aggOp(AggregateColumn::COUNT); - parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); - //ac->functionParms(parm); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::FUNC_ITEM: - { - Item_func* ifp = (Item_func*)sfitemp; - ReturnedColumn* rc = 0; - - // check count(1+1) case - vector tmpVec; - uint16_t parseInfo = 0; - parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); - - if (parseInfo & SUB_BIT) + if (!rc || gwi.fatalParseError) { - gwi.fatalParseError = true; - break; - } - else if (!gwi.fatalParseError && - !(parseInfo & AGG_BIT) && - !(parseInfo & AF_BIT) && - tmpVec.size() == 0) - { - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - FunctionColumn* fc = dynamic_cast(rc); - - if ((fc && fc->functionParms().empty()) || !fc) - { - //ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (dynamic_cast(rc)) - { - //@bug5229. handle constant function on aggregate argument - ac->constCol(SRCP(rc)); - break; - } - } - } - - // MySQL carelessly allows correlated aggregate function on the WHERE clause. - // Here is the work around to deal with that inconsistence. - // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; - ClauseType clauseType = gwi.clauseType; - - if (gwi.clauseType == WHERE) - gwi.clauseType = HAVING; - - // @bug 3603. for cases like max(rand()). try to build function first. - if (!rc) - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - - parm.reset(rc); - gwi.clauseType = clauseType; - - if (gwi.fatalParseError) - break; - - //ac->functionParms(parm); - break; - } - - case Item::REF_ITEM: - { - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (rc) - { - parm.reset(rc); - //ac->functionParms(parm); - break; + if (ac) + delete ac; + return NULL; } } - default: - { - gwi.fatalParseError = true; - //gwi.parseErrorText = "Non-supported Item in Aggregate function"; - } + // 10.2 TODO: direction is now a tri-state flag + rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); + orderCols.push_back(SRCP(rc)); } - if (gwi.fatalParseError) + rowCol->columnVec(selCols); + (dynamic_cast(ac))->orderCols(orderCols); + parm.reset(rowCol); + + if (gc->str_separator()) { - if (gwi.parseErrorText.empty()) - { - Message::Args args; - - if (item->name) - args.add(item->name); - else - args.add(""); - - gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); - } - - return NULL; + string separator; + separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); + (dynamic_cast(ac))->separator(separator); } } - } - - if (parm) - { - ac->functionParms(parm); - - if (isp->sum_func() == Item_sum::AVG_FUNC || - isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::DECIMAL; - ct.colWidth = 8; - ct.scale += 4; - break; - -#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; -#endif - - default: - break; - } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::COUNT_FUNC || - isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = parm->resultType().scale; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_FUNC || - isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - ct.colDataType = CalpontSystemCatalog::BIGINT; - - // no break, let fall through - - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - ct.colWidth = 8; - break; - - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::UBIGINT; - ct.colWidth = 8; - break; - -#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; -#endif - - default: - break; - } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::STD_FUNC || - isp->sum_func() == Item_sum::VARIANCE_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - ct.scale = 0; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = 0; - ct.precision = -16; // borrowed to indicate skip null value check on connector - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) - { - //Item_func_group_concat* gc = (Item_func_group_concat*)isp; - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::VARCHAR; - ct.colWidth = isp->max_length; - ct.precision = 0; - ac->resultType(ct); - } else { - ac->resultType(parm->resultType()); + for (uint32_t i = 0; i < isp->argument_count(); i++) + { + Item* sfitemp = sfitempp[i]; + Item::Type sfitype = sfitemp->type(); + + switch (sfitype) + { + case Item::FIELD_ITEM: + { + Item_field* ifp = reinterpret_cast(sfitemp); + SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + + if (!sc) + { + gwi.fatalParseError = true; + break; + } + + parm.reset(sc); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); + TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); + gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); + break; + } + + case Item::INT_ITEM: + case Item::STRING_ITEM: + case Item::REAL_ITEM: + case Item::DECIMAL_ITEM: + { + // treat as count(*) + if (ac->aggOp() == AggregateColumn::COUNT) + ac->aggOp(AggregateColumn::COUNT_ASTERISK); + + ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + break; + } + + case Item::NULL_ITEM: + { + parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); + ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + break; + } + + case Item::FUNC_ITEM: + { + Item_func* ifp = (Item_func*)sfitemp; + ReturnedColumn* rc = 0; + + // check count(1+1) case + vector tmpVec; + uint16_t parseInfo = 0; + parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); + + if (parseInfo & SUB_BIT) + { + gwi.fatalParseError = true; + break; + } + else if (!gwi.fatalParseError && + !(parseInfo & AGG_BIT) && + !(parseInfo & AF_BIT) && + tmpVec.size() == 0) + { + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + FunctionColumn* fc = dynamic_cast(rc); + + if ((fc && fc->functionParms().empty()) || !fc) + { + //ac->aggOp(AggregateColumn::COUNT_ASTERISK); + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (dynamic_cast(rc)) + { + //@bug5229. handle constant function on aggregate argument + ac->constCol(SRCP(rc)); + break; + } + } + } + + // MySQL carelessly allows correlated aggregate function on the WHERE clause. + // Here is the work around to deal with that inconsistence. + // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; + ClauseType clauseType = gwi.clauseType; + + if (gwi.clauseType == WHERE) + gwi.clauseType = HAVING; + + // @bug 3603. for cases like max(rand()). try to build function first. + if (!rc) + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + + parm.reset(rc); + gwi.clauseType = clauseType; + + if (gwi.fatalParseError) + break; + + break; + } + + case Item::REF_ITEM: + { + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (rc) + { + parm.reset(rc); + break; + } + } + + default: + { + gwi.fatalParseError = true; + //gwi.parseErrorText = "Non-supported Item in Aggregate function"; + } + } + + if (gwi.fatalParseError) + { + if (gwi.parseErrorText.empty()) + { + Message::Args args; + + if (item->name) + args.add(item->name); + else + args.add(""); + + gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); + } + + if (ac) + delete ac; + return NULL; + } + if (parm) + { + // MCOL-1201 multi-argument aggregate + ac->aggParms().push_back(parm); + } + } } - } - else - { - ac->resultType(colType_MysqlToIDB(isp)); - } - // adjust decimal result type according to internalDecimalScale - if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) - { - CalpontSystemCatalog::ColType ct = ac->resultType(); - ct.scale = gwi.internalDecimalScale; - ac->resultType(ct); - } - - // check for same aggregate on the select list - ac->expressionId(ci->expressionId++); - - if (gwi.clauseType != SELECT) - { - for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) + // Get result type + // Modified for MCOL-1201 multi-argument aggregate + if (ac->aggParms().size() > 0) { - if (*ac == gwi.returnedCols[i].get()) - ac->expressionId(gwi.returnedCols[i]->expressionId()); - } - } + // These are all one parm functions, so we can safely + // use the first parm for result type. + parm = ac->aggParms()[0]; + if (isp->sum_func() == Item_sum::AVG_FUNC || + isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); - // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will - // be applied in ExeMgr. When the ExeMgr fix is available, this checking - // will be taken out. - if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + switch (ct.colDataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::DECIMAL; + ct.colWidth = 8; + ct.scale += 4; + break; + + #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; + #endif + + default: + break; + } + + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::COUNT_FUNC || + isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = parm->resultType().scale; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_FUNC || + isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); + + switch (ct.colDataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + ct.colDataType = CalpontSystemCatalog::BIGINT; + + // no break, let fall through + + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + ct.colWidth = 8; + break; + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::UBIGINT; + ct.colWidth = 8; + break; + + #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; + #endif + + default: + break; + } + + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::STD_FUNC || + isp->sum_func() == Item_sum::VARIANCE_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + ct.scale = 0; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = 0; + ct.precision = -16; // borrowed to indicate skip null value check on connector + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + //Item_func_group_concat* gc = (Item_func_group_concat*)isp; + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::VARCHAR; + ct.colWidth = isp->max_length; + ct.precision = 0; + ac->resultType(ct); + } + else + { + // UDAF result type will be set below. + ac->resultType(parm->resultType()); + } + } + else + { + ac->resultType(colType_MysqlToIDB(isp)); + } + + // adjust decimal result type according to internalDecimalScale + if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) + { + CalpontSystemCatalog::ColType ct = ac->resultType(); + ct.scale = gwi.internalDecimalScale; + ac->resultType(ct); + } + + // check for same aggregate on the select list + ac->expressionId(ci->expressionId++); + + if (gwi.clauseType != SELECT) + { + for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) + { + if (*ac == gwi.returnedCols[i].get()) + ac->expressionId(gwi.returnedCols[i]->expressionId()); + } + } + + // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will + // be applied in ExeMgr. When the ExeMgr fix is available, this checking + // will be taken out. + if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "No project column found for aggregate function"; + if (ac) + delete ac; + return NULL; + } + else if (ac->constCol()) + { + gwi.count_asterisk_list.push_back(ac); + } + + // For UDAF, populate the context and call the UDAF init() function. + // The return type is (should be) set in context by init(). + if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) + { + UDAFColumn* udafc = dynamic_cast(ac); + + if (udafc) + { + mcsv1Context& context = udafc->getContext(); + context.setName(isp->func_name()); + + // Set up the return type defaults for the call to init() + context.setResultType(udafc->resultType().colDataType); + context.setColWidth(udafc->resultType().colWidth); + context.setScale(udafc->resultType().scale); + context.setPrecision(udafc->resultType().precision); + + context.setParamCount(udafc->aggParms().size()); + ColumnDatum colType; + ColumnDatum colTypes[udafc->aggParms().size()]; + // Build the column type vector. + // Modified for MCOL-1201 multi-argument aggregate + for (uint32_t i = 0; i < udafc->aggParms().size(); ++i) + { + const execplan::CalpontSystemCatalog::ColType& resultType + = udafc->aggParms()[i]->resultType(); + colType.dataType = resultType.colDataType; + colType.precision = resultType.precision; + colType.scale = resultType.scale; + colTypes[i] = colType; + } + + // Call the user supplied init() + mcsv1sdk::mcsv1_UDAF* udaf = context.getFunction(); + if (!udaf) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "Aggregate Function " + context.getName() + " doesn't exist in the ColumnStore engine"; + if (ac) + delete ac; + return NULL; + } + if (udaf->init(&context, colTypes) == mcsv1_UDAF::ERROR) + { + gwi.fatalParseError = true; + gwi.parseErrorText = udafc->getContext().getErrorMessage(); + if (ac) + delete ac; + return NULL; + } + + // UDAF_OVER_REQUIRED means that this function is for Window + // Function only. Reject it here in aggregate land. + if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) + { + gwi.fatalParseError = true; + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, + context.getName()); + if (ac) + delete ac; + return NULL; + } + + // Set the return type as set in init() + CalpontSystemCatalog::ColType ct; + ct.colDataType = context.getResultType(); + ct.colWidth = context.getColWidth(); + ct.scale = context.getScale(); + ct.precision = context.getPrecision(); + udafc->resultType(ct); + } + } + + } + catch (std::logic_error e) { gwi.fatalParseError = true; - gwi.parseErrorText = "No project column found for aggregate function"; + gwi.parseErrorText = "error building Aggregate Function: "; + gwi.parseErrorText += e.what(); + if (ac) + delete ac; return NULL; } - else if (ac->constCol()) + catch (...) { - gwi.count_asterisk_list.push_back(ac); + gwi.fatalParseError = true; + gwi.parseErrorText = "error building Aggregate Function: Unspecified exception"; + if (ac) + delete ac; + return NULL; } - - // For UDAF, populate the context and call the UDAF init() function. - if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) - { - UDAFColumn* udafc = dynamic_cast(ac); - - if (udafc) - { - mcsv1Context& context = udafc->getContext(); - context.setName(isp->func_name()); - - // Set up the return type defaults for the call to init() - context.setResultType(udafc->resultType().colDataType); - context.setColWidth(udafc->resultType().colWidth); - context.setScale(udafc->resultType().scale); - context.setPrecision(udafc->resultType().precision); - - COL_TYPES colTypes; - execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; - - // Build the column type vector. For now, there is only one - colTypes.push_back(make_pair(udafc->functionParms()->alias(), udafc->functionParms()->resultType().colDataType)); - - // Call the user supplied init() - if (context.getFunction()->init(&context, colTypes) == mcsv1_UDAF::ERROR) - { - gwi.fatalParseError = true; - gwi.parseErrorText = udafc->getContext().getErrorMessage(); - return NULL; - } - - if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) - { - gwi.fatalParseError = true; - gwi.parseErrorText = - logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, - context.getName()); - return NULL; - } - - // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; - ct.colDataType = context.getResultType(); - ct.colWidth = context.getColWidth(); - ct.scale = context.getScale(); - ct.precision = context.getPrecision(); - udafc->resultType(ct); - } - } - return ac; } @@ -7834,7 +7904,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->functionParms(minSc); + (*coliter)->aggParms().push_back(minSc); } std::vector::iterator funciter; @@ -9898,7 +9968,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->functionParms(minSc); + (*coliter)->aggParms().push_back(minSc); } std::vector::iterator funciter; diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 12fa74fa5..5ec4307a9 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -779,8 +779,11 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, bool h //double double_val = *(double*)(&value); //f2->store(double_val); - if (f2->decimals() < (uint32_t)row.getScale(s)) - f2->dec = (uint32_t)row.getScale(s); + if ((f2->decimals() == DECIMAL_NOT_SPECIFIED && row.getScale(s) > 0) + || f2->decimals() < row.getScale(s)) + { + f2->dec = row.getScale(s); + } f2->store(dl); @@ -5273,7 +5276,6 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE execplan::CalpontSelectExecutionPlan::ColumnMap::iterator colMapIter; execplan::CalpontSelectExecutionPlan::ColumnMap::iterator condColMapIter; execplan::ParseTree* ptIt; - execplan::ReturnedColumn* rcIt; for(TABLE_LIST* tl = gi.groupByTables; tl; tl=tl->next_local) { mapiter = ci->tableMap.find(tl->table); diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp index 1635c815a..cf6abb6d6 100644 --- a/dbcon/mysql/ha_window_function.cpp +++ b/dbcon/mysql/ha_window_function.cpp @@ -340,6 +340,7 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n ac->distinct(item_sum->has_with_distinct()); Window_spec* win_spec = wf->window_spec; SRCP srcp; + CalpontSystemCatalog::ColType ct; // For return type // arguments vector funcParms; @@ -370,18 +371,25 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n context.setColWidth(rt.colWidth); context.setScale(rt.scale); context.setPrecision(rt.precision); + context.setParamCount(funcParms.size()); + + mcsv1sdk::ColumnDatum colType; + mcsv1sdk::ColumnDatum colTypes[funcParms.size()]; // Turn on the Analytic flag so the function is aware it is being called // as a Window Function. context.setContextFlag(CONTEXT_IS_ANALYTIC); - COL_TYPES colTypes; - execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; - // Build the column type vector. + // Modified for MCOL-1201 multi-argument aggregate for (size_t i = 0; i < funcParms.size(); ++i) { - colTypes.push_back(make_pair(funcParms[i]->alias(), funcParms[i]->resultType().colDataType)); + const execplan::CalpontSystemCatalog::ColType& resultType + = funcParms[i]->resultType(); + colType.dataType = resultType.colDataType; + colType.precision = resultType.precision; + colType.scale = resultType.scale; + colTypes[i] = colType; } // Call the user supplied init() @@ -401,7 +409,6 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n } // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; ct.colDataType = context.getResultType(); ct.colWidth = context.getColWidth(); ct.scale = context.getScale(); @@ -419,10 +426,10 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n { case Item_sum::UDF_SUM_FUNC: { - uint64_t bIgnoreNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)); - char sIgnoreNulls[18]; - sprintf(sIgnoreNulls, "%lu", bIgnoreNulls); - srcp.reset(new ConstantColumn(sIgnoreNulls, (uint64_t)bIgnoreNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT + uint64_t bRespectNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) ? 0 : 1; + char sRespectNulls[18]; + sprintf(sRespectNulls, "%lu", bRespectNulls); + srcp.reset(new ConstantColumn(sRespectNulls, (uint64_t)bRespectNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT funcParms.push_back(srcp); break; } @@ -880,11 +887,13 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n return NULL; } - ac->resultType(colType_MysqlToIDB(item_sum)); - - // bug5736. Make the result type double for some window functions when - // infinidb_double_for_decimal_math is set. - ac->adjustResultType(); + if (item_sum->sum_func() != Item_sum::UDF_SUM_FUNC) + { + ac->resultType(colType_MysqlToIDB(item_sum)); + // bug5736. Make the result type double for some window functions when + // infinidb_double_for_decimal_math is set. + ac->adjustResultType(); + } ac->expressionId(ci->expressionId++); diff --git a/utils/common/any.hpp b/utils/common/any.hpp index be0ca679b..5408c5c87 100755 --- a/utils/common/any.hpp +++ b/utils/common/any.hpp @@ -9,123 +9,142 @@ * http://www.boost.org/LICENSE_1_0.txt */ +#include #include namespace static_any { namespace anyimpl { + struct bad_any_cast + { + }; - struct bad_any_cast - { - }; + struct empty_any + { + }; - struct empty_any - { - }; + struct base_any_policy + { + virtual void static_delete(void** x) = 0; + virtual void copy_from_value(void const* src, void** dest) = 0; + virtual void clone(void* const* src, void** dest) = 0; + virtual void move(void* const* src, void** dest) = 0; + virtual void* get_value(void** src) = 0; + virtual size_t get_size() = 0; + }; - struct base_any_policy - { - virtual void static_delete(void** x) = 0; - virtual void copy_from_value(void const* src, void** dest) = 0; - virtual void clone(void* const* src, void** dest) = 0; - virtual void move(void* const* src, void** dest) = 0; - virtual void* get_value(void** src) = 0; - virtual size_t get_size() = 0; - }; + template + struct typed_base_any_policy : base_any_policy + { + virtual size_t get_size() + { + return sizeof(T); + } + }; - template - struct typed_base_any_policy : base_any_policy - { - virtual size_t get_size() { return sizeof(T); } - }; + template + struct small_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) + { + } + virtual void copy_from_value(void const* src, void** dest) + { + new(dest) T(*reinterpret_cast(src)); + } + virtual void clone(void* const* src, void** dest) + { + *dest = *src; + } + virtual void move(void* const* src, void** dest) + { + *dest = *src; + } + virtual void* get_value(void** src) + { + return reinterpret_cast(src); + } + }; - template - struct small_any_policy : typed_base_any_policy - { - virtual void static_delete(void** x) { } - virtual void copy_from_value(void const* src, void** dest) - { new(dest) T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) { *dest = *src; } - virtual void move(void* const* src, void** dest) { *dest = *src; } - virtual void* get_value(void** src) { return reinterpret_cast(src); } - }; - - template - struct big_any_policy : typed_base_any_policy - { - virtual void static_delete(void** x) + template + struct big_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) { if (*x) - delete(*reinterpret_cast(x)); + delete(*reinterpret_cast(x)); *x = NULL; } - virtual void copy_from_value(void const* src, void** dest) + virtual void copy_from_value(void const* src, void** dest) { - *dest = new T(*reinterpret_cast(src)); + *dest = new T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) + virtual void clone(void* const* src, void** dest) { - *dest = new T(**reinterpret_cast(src)); + *dest = new T(**reinterpret_cast(src)); } - virtual void move(void* const* src, void** dest) + virtual void move(void* const* src, void** dest) { - (*reinterpret_cast(dest))->~T(); - **reinterpret_cast(dest) = **reinterpret_cast(src); + (*reinterpret_cast(dest))->~T(); + **reinterpret_cast(dest) = **reinterpret_cast(src); } - virtual void* get_value(void** src) { return *src; } - }; + virtual void* get_value(void** src) + { + return *src; + } + }; - template - struct choose_policy - { - typedef big_any_policy type; - }; + template + struct choose_policy + { + typedef big_any_policy type; + }; - template - struct choose_policy - { - typedef small_any_policy type; - }; + template + struct choose_policy + { + typedef small_any_policy type; + }; - struct any; + struct any; - /// Choosing the policy for an any type is illegal, but should never happen. - /// This is designed to throw a compiler error. - template<> - struct choose_policy - { - typedef void type; - }; + /// Choosing the policy for an any type is illegal, but should never happen. + /// This is designed to throw a compiler error. + template<> + struct choose_policy + { + typedef void type; + }; - /// Specializations for small types. - #define SMALL_POLICY(TYPE) template<> struct \ - choose_policy { typedef small_any_policy type; }; + /// Specializations for small types. +#define SMALL_POLICY(TYPE) template<> struct \ + choose_policy { typedef small_any_policy type; }; - SMALL_POLICY(char); - SMALL_POLICY(signed char); - SMALL_POLICY(unsigned char); - SMALL_POLICY(signed short); - SMALL_POLICY(unsigned short); - SMALL_POLICY(signed int); - SMALL_POLICY(unsigned int); - SMALL_POLICY(signed long); - SMALL_POLICY(unsigned long); - SMALL_POLICY(signed long long); - SMALL_POLICY(unsigned long long); - SMALL_POLICY(float); - SMALL_POLICY(double); - SMALL_POLICY(bool); + SMALL_POLICY(char); + SMALL_POLICY(signed char); + SMALL_POLICY(unsigned char); + SMALL_POLICY(signed short); + SMALL_POLICY(unsigned short); + SMALL_POLICY(signed int); + SMALL_POLICY(unsigned int); + SMALL_POLICY(signed long); + SMALL_POLICY(unsigned long); + SMALL_POLICY(signed long long); + SMALL_POLICY(unsigned long long); + SMALL_POLICY(float); + SMALL_POLICY(double); + SMALL_POLICY(bool); - #undef SMALL_POLICY +#undef SMALL_POLICY - /// This function will return a different policy for each type. - template - base_any_policy* get_policy() - { - static typename choose_policy::type policy; - return &policy; - }; + /// This function will return a different policy for each type. + template + base_any_policy* get_policy() + { + static typename choose_policy::type policy; + return &policy; + }; } class any @@ -139,37 +158,40 @@ public: /// Initializing constructor. template any(const T& x) - : policy(anyimpl::get_policy()), object(NULL) + : policy(anyimpl::get_policy()), object(NULL) { assign(x); } /// Empty constructor. any() - : policy(anyimpl::get_policy()), object(NULL) - { } + : policy(anyimpl::get_policy()), object(NULL) + { + } /// Special initializing constructor for string literals. any(const char* x) - : policy(anyimpl::get_policy()), object(NULL) - { + : policy(anyimpl::get_policy()), object(NULL) + { assign(x); } /// Copy constructor. any(const any& x) - : policy(anyimpl::get_policy()), object(NULL) - { + : policy(anyimpl::get_policy()), object(NULL) + { assign(x); } /// Destructor. - ~any() { + ~any() + { policy->static_delete(&object); } /// Assignment function from another any. - any& assign(const any& x) { + any& assign(const any& x) + { reset(); policy = x.policy; policy->clone(&x.object, &object); @@ -178,7 +200,8 @@ public: /// Assignment function. template - any& assign(const T& x) { + any& assign(const T& x) + { reset(); policy = anyimpl::get_policy(); policy->copy_from_value(&x, &object); @@ -197,8 +220,42 @@ public: return assign(x); } + /// Less than operator for sorting + bool operator<(const any& x) const + { + if (policy == x.policy) + { + void* p1 = const_cast(object); + void* p2 = const_cast(x.object); + return memcmp(policy->get_value(&p1), + x.policy->get_value(&p2), + policy->get_size()) < 0 ? 1 : 0; + } + return 0; + } + + /// equal operator + bool operator==(const any& x) const + { + if (policy == x.policy) + { + void* p1 = const_cast(object); + void* p2 = const_cast(x.object); + return memcmp(policy->get_value(&p1), + x.policy->get_value(&p2), + policy->get_size()) == 0 ? 1 : 0; + } + return 0; + } + /// Utility functions - any& swap(any& x) { + uint8_t getHash() const + { + void* p1 = const_cast(object); + return *(uint64_t*)policy->get_value(&p1) % 4048; + } + any& swap(any& x) + { std::swap(policy, x.policy); std::swap(object, x.object); return *this; @@ -206,27 +263,32 @@ public: /// Cast operator. You can only cast to the original type. template - T& cast() { - if (policy != anyimpl::get_policy()) + T& cast() + { + if (policy != anyimpl::get_policy()) throw anyimpl::bad_any_cast(); T* r = reinterpret_cast(policy->get_value(&object)); return *r; } /// Returns true if the any contains no value. - bool empty() const { + bool empty() const + { return policy == anyimpl::get_policy(); } /// Frees any allocated memory, and sets the value to NULL. - void reset() { + void reset() + { policy->static_delete(&object); policy = anyimpl::get_policy(); } /// Returns true if the two types are the same. - bool compatible(const any& x) const { + bool compatible(const any& x) const + { return policy == x.policy; } }; + } diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 8d110cfc8..c1f5bbd63 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -215,6 +215,22 @@ inline string getStringNullValue() namespace rowgroup { +const std::string typeStr(""); +const static_any::any& RowAggregation::charTypeId((char)1); +const static_any::any& RowAggregation::scharTypeId((signed char)1); +const static_any::any& RowAggregation::shortTypeId((short)1); +const static_any::any& RowAggregation::intTypeId((int)1); +const static_any::any& RowAggregation::longTypeId((long)1); +const static_any::any& RowAggregation::llTypeId((long long)1); +const static_any::any& RowAggregation::ucharTypeId((unsigned char)1); +const static_any::any& RowAggregation::ushortTypeId((unsigned short)1); +const static_any::any& RowAggregation::uintTypeId((unsigned int)1); +const static_any::any& RowAggregation::ulongTypeId((unsigned long)1); +const static_any::any& RowAggregation::ullTypeId((unsigned long long)1); +const static_any::any& RowAggregation::floatTypeId((float)1); +const static_any::any& RowAggregation::doubleTypeId((double)1); +const static_any::any& RowAggregation::strTypeId(typeStr); + KeyStorage::KeyStorage(const RowGroup& keys, Row** tRow) : tmpRow(tRow), rg(keys) { RGData data(rg); @@ -691,7 +707,8 @@ RowAggregation::RowAggregation(const vector& rowAggGroupByCol RowAggregation::RowAggregation(const RowAggregation& rhs): fAggMapPtr(NULL), fRowGroupOut(NULL), fTotalRowCount(0), fMaxTotalRowCount(AGG_ROWGROUP_SIZE), - fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0) + fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0), + fRGContext(rhs.fRGContext) { //fGroupByCols.clear(); //fFunctionCols.clear(); @@ -756,7 +773,6 @@ void RowAggregation::addRowGroup(const RowGroup* pRows, vector& in { // this function is for threaded aggregation, which is for group by and distinct. // if (countSpecial(pRows)) - Row rowIn; pRows->initRow(&rowIn); @@ -790,7 +806,7 @@ void RowAggregation::setJoinRowGroups(vector* pSmallSideRG, RowGroup* } //------------------------------------------------------------------------------ -// For UDAF, we need to sometimes start a new context. +// For UDAF, we need to sometimes start a new fRGContext. // // This will be called any number of times by each of the batchprimitiveprocessor // threads on the PM and by multple threads on the UM. It must remain @@ -801,29 +817,29 @@ void RowAggregation::resetUDAF(uint64_t funcColID) // Get the UDAF class pointer and store in the row definition object. RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColID].get()); - // resetUDAF needs to be re-entrant. Since we're modifying the context object - // by creating a new userData, we need a local copy. The copy constructor - // doesn't copy userData. - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + // RowAggregation and it's functions need to be re-entrant which means + // each instance (thread) needs its own copy of the context object. + // Note: operator=() doesn't copy userData. + fRGContext = rowUDAF->fUDAFContext; // Call the user reset for the group userData. Since, at this point, // context's userData will be NULL, reset will generate a new one. mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->reset(&rgContext); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } fRow.setUserDataStore(fRowGroupOut->getRGData()->getUserDataStore()); - fRow.setUserData(rgContext, - rgContext.getUserDataSP(), - rgContext.getUserDataSize(), + fRow.setUserData(fRGContext, + fRGContext.getUserDataSP(), + fRGContext.getUserDataSize(), rowUDAF->fAuxColumnIndex); - rgContext.setUserData(NULL); // Prevents calling deleteUserData on the context. + fRGContext.setUserData(NULL); // Prevents calling deleteUserData on the fRGContext. } //------------------------------------------------------------------------------ @@ -873,7 +889,6 @@ void RowAggregation::initialize() } } - // Save the RowGroup data pointer fResultDataVec.push_back(fRowGroupOut->getRGData()); @@ -1658,10 +1673,11 @@ void RowAggregation::updateEntry(const Row& rowIn) { for (uint64_t i = 0; i < fFunctionCols.size(); i++) { - int64_t colIn = fFunctionCols[i]->fInputColumnIndex; - int64_t colOut = fFunctionCols[i]->fOutputColumnIndex; + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[i]; + int64_t colIn = pFunctionCol->fInputColumnIndex; + int64_t colOut = pFunctionCol->fOutputColumnIndex; - switch (fFunctionCols[i]->fAggFunction) + switch (pFunctionCol->fAggFunction) { case ROWAGG_COUNT_COL_NAME: @@ -1675,7 +1691,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_MIN: case ROWAGG_MAX: case ROWAGG_SUM: - doMinMaxSum(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction); + doMinMaxSum(rowIn, colIn, colOut, pFunctionCol->fAggFunction); break; case ROWAGG_AVG: @@ -1692,7 +1708,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_BIT_OR: case ROWAGG_BIT_XOR: { - doBitOp(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction); + doBitOp(rowIn, colIn, colOut, pFunctionCol->fAggFunction); break; } @@ -1707,11 +1723,11 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); + RowUDAFFunctionCol* rowUDAF = dynamic_cast(pFunctionCol.get()); if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF); + doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF, i); } else { @@ -1725,7 +1741,7 @@ void RowAggregation::updateEntry(const Row& rowIn) { std::ostringstream errmsg; errmsg << "RowAggregation: function (id = " << - (uint64_t) fFunctionCols[i]->fAggFunction << ") is not supported."; + (uint64_t) pFunctionCol->fAggFunction << ") is not supported."; cerr << errmsg.str() << endl; throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); break; @@ -1997,131 +2013,142 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu } void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF) + RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { - std::vector valsIn; - execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupIn.getColTypes()[colIn]; - std::vector dataFlags; + int32_t paramCount = fRGContext.getParameterCount(); + // The vector of parameters to be sent to the UDAF + mcsv1sdk::ColumnDatum valsIn[paramCount]; + uint32_t dataFlags[paramCount]; - // Get the context for this rowGroup. Make a copy so we're thread safe. - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); - - // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; - - if (isNull(&fRowGroupIn, rowIn, colIn) == true) + execplan::CalpontSystemCatalog::ColDataType colDataType; + for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) { - if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + // Turn on NULL flags + dataFlags[i] = 0; + if (isNull(&fRowGroupIn, rowIn, colIn) == true) { - return; + if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + { + return; + } + dataFlags[i] |= mcsv1sdk::PARAM_IS_NULL; + } + + colDataType = fRowGroupIn.getColTypes()[colIn]; + if (!fRGContext.isParamNull(i)) + { + switch (colDataType) + { + case execplan::CalpontSystemCatalog::TINYINT: + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + case execplan::CalpontSystemCatalog::INT: + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + break; + } + + case execplan::CalpontSystemCatalog::UTINYINT: + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + case execplan::CalpontSystemCatalog::UINT: + case execplan::CalpontSystemCatalog::UBIGINT: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + datum.columnData = rowIn.getUintField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + { + datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; + datum.columnData = rowIn.getDoubleField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + { + datum.dataType = execplan::CalpontSystemCatalog::FLOAT; + datum.columnData = rowIn.getFloatField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + datum.columnData = rowIn.getUintField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::TIME: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + datum.columnData = rowIn.getIntField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + { + datum.dataType = colDataType; + datum.columnData = rowIn.getStringField(colIn); + break; + } + + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation " << fRGContext.getName() << + ": No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } + } } - flag |= mcsv1sdk::PARAM_IS_NULL; - } - - flags.push_back(flag); - rgContext.setDataFlags(&flags); - - mcsv1sdk::ColumnDatum datum; - - if (!rgContext.isParamNull(0)) - { - switch (colDataType) + // MCOL-1201: If there are multiple parameters, the next fFunctionCols + // will have the column used. By incrementing the funcColsIdx (passed by + // ref, we also increment the caller's index. + if (fFunctionCols.size() > funcColsIdx + 1 + && fFunctionCols[funcColsIdx+1]->fAggFunction == ROWAGG_MULTI_PARM) { - case execplan::CalpontSystemCatalog::TINYINT: - case execplan::CalpontSystemCatalog::SMALLINT: - case execplan::CalpontSystemCatalog::MEDINT: - case execplan::CalpontSystemCatalog::INT: - case execplan::CalpontSystemCatalog::BIGINT: - case execplan::CalpontSystemCatalog::DECIMAL: - case execplan::CalpontSystemCatalog::UDECIMAL: - { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - datum.scale = fRowGroupIn.getScale()[colIn]; - datum.precision = fRowGroupIn.getPrecision()[colIn]; - break; - } - - case execplan::CalpontSystemCatalog::UTINYINT: - case execplan::CalpontSystemCatalog::USMALLINT: - case execplan::CalpontSystemCatalog::UMEDINT: - case execplan::CalpontSystemCatalog::UINT: - case execplan::CalpontSystemCatalog::UBIGINT: - { - datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::DOUBLE: - case execplan::CalpontSystemCatalog::UDOUBLE: - { - datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; - datum.columnData = rowIn.getDoubleField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::FLOAT: - case execplan::CalpontSystemCatalog::UFLOAT: - { - datum.dataType = execplan::CalpontSystemCatalog::FLOAT; - datum.columnData = rowIn.getFloatField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::DATE: - case execplan::CalpontSystemCatalog::DATETIME: - { - datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::TIME: - { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::CHAR: - case execplan::CalpontSystemCatalog::VARCHAR: - case execplan::CalpontSystemCatalog::TEXT: - case execplan::CalpontSystemCatalog::VARBINARY: - case execplan::CalpontSystemCatalog::CLOB: - case execplan::CalpontSystemCatalog::BLOB: - { - datum.dataType = colDataType; - datum.columnData = rowIn.getStringField(colIn); - break; - } - - default: - { - std::ostringstream errmsg; - errmsg << "RowAggregation " << rgContext.getName() << - ": No logic for data type: " << colDataType; - throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); - break; - } + ++funcColsIdx; + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; + colIn = pFunctionCol->fInputColumnIndex; + colOut = pFunctionCol->fOutputColumnIndex; + } + else + { + break; } } - valsIn.push_back(datum); - // The intermediate values are stored in userData referenced by colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setDataFlags(dataFlags); + fRGContext.setUserData(fRow.getUserData(colAux)); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->nextValue(&rgContext, valsIn); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } } @@ -2218,6 +2245,7 @@ RowAggregationUM::RowAggregationUM(const RowAggregationUM& rhs) : fHasAvg(rhs.fHasAvg), fKeyOnHeap(rhs.fKeyOnHeap), fHasStatsFunc(rhs.fHasStatsFunc), + fHasUDAF(rhs.fHasUDAF), fExpression(rhs.fExpression), fTotalMemUsage(rhs.fTotalMemUsage), fRm(rhs.fRm), @@ -2419,7 +2447,7 @@ void RowAggregationUM::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { @@ -2585,22 +2613,6 @@ void RowAggregationUM::calculateAvgColumns() // Sets the value from valOut into column colOut, performing any conversions. void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) { - static const static_any::any& charTypeId((char)1); - static const static_any::any& scharTypeId((signed char)1); - static const static_any::any& shortTypeId((short)1); - static const static_any::any& intTypeId((int)1); - static const static_any::any& longTypeId((long)1); - static const static_any::any& llTypeId((long long)1); - static const static_any::any& ucharTypeId((unsigned char)1); - static const static_any::any& ushortTypeId((unsigned short)1); - static const static_any::any& uintTypeId((unsigned int)1); - static const static_any::any& ulongTypeId((unsigned long)1); - static const static_any::any& ullTypeId((unsigned long long)1); - static const static_any::any& floatTypeId((float)1); - static const static_any::any& doubleTypeId((double)1); - static const std::string typeStr(""); - static const static_any::any& strTypeId(typeStr); - execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; if (valOut.empty()) @@ -2609,6 +2621,179 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) return; } + int64_t intOut = 0; + uint64_t uintOut = 0; + float floatOut = 0.0; + double doubleOut = 0.0; + ostringstream oss; + std::string strOut; + + bool bSetSuccess = false; + switch (colDataType) + { + case execplan::CalpontSystemCatalog::BIT: + case execplan::CalpontSystemCatalog::TINYINT: + if (valOut.compatible(charTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + else if (valOut.compatible(scharTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + if (bSetSuccess) + { + fRow.setIntField<1>(intOut, colOut); + } + break; + + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + if (valOut.compatible(shortTypeId)) + { + intOut = valOut.cast(); + fRow.setIntField<2>(intOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::INT: + if (valOut.compatible(uintTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + else if (valOut.compatible(longTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + if (bSetSuccess) + { + fRow.setIntField<4>(intOut, colOut); + } + break; + + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + if (valOut.compatible(llTypeId)) + { + intOut = valOut.cast(); + fRow.setIntField<8>(intOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UTINYINT: + if (valOut.compatible(ucharTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<1>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + if (valOut.compatible(ushortTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<2>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UINT: + if (valOut.compatible(uintTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<4>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UBIGINT: + if (valOut.compatible(ulongTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<8>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + if (valOut.compatible(ulongTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<8>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + if (valOut.compatible(floatTypeId)) + { + floatOut = valOut.cast(); + fRow.setFloatField(floatOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + if (valOut.compatible(doubleTypeId)) + { + doubleOut = valOut.cast(); + fRow.setDoubleField(doubleOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + fRow.setStringField(strOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + fRow.setVarBinaryField(strOut, colOut); + bSetSuccess = true; + } + break; + + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation: No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } + } + if (!bSetSuccess) + { + SetUDAFAnyValue(valOut, colOut); + } +} + +void RowAggregationUM::SetUDAFAnyValue(static_any::any& valOut, int64_t colOut) +{ + execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; + // This may seem a bit convoluted. Users shouldn't return a type // that they didn't set in mcsv1_UDAF::init(), but this // handles whatever return type is given and casts @@ -2814,7 +2999,7 @@ void RowAggregationUM::calculateUDAFColumns() continue; rowUDAF = dynamic_cast(fFunctionCols[i].get()); - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + fRGContext = rowUDAF->fUDAFContext; int64_t colOut = rowUDAF->fOutputColumnIndex; int64_t colAux = rowUDAF->fAuxColumnIndex; @@ -2826,26 +3011,26 @@ void RowAggregationUM::calculateUDAFColumns() fRowGroupOut->getRow(j, &fRow); // Turn the NULL flag off. We can't know NULL at this point - rgContext.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); // The intermediate values are stored in colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setUserData(fRow.getUserData(colAux)); // Call the UDAF evaluate function mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->evaluate(&rgContext, valOut); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); } - rgContext.setUserData(NULL); + fRGContext.setUserData(NULL); } } @@ -3116,54 +3301,60 @@ void RowAggregationUM::doNullConstantAggregate(const ConstantAggData& aggData, u { // For a NULL constant, call nextValue with NULL and then evaluate. bool bInterrupted = false; - mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext); - context.setInterrupted(bInterrupted); - context.createUserData(); + fRGContext.setInterrupted(bInterrupted); + fRGContext.createUserData(); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - std::vector valsIn; + mcsv1sdk::ColumnDatum valsIn[1]; // Call a reset, then nextValue, then execute. This will evaluate // the UDAF for the constant. - rc = context.getFunction()->reset(&context); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } - +#if 0 + uint32_t dataFlags[fRGContext.getParameterCount()]; + for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) + { + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + // Turn on NULL flags + dataFlags[i] = 0; + } +#endif // Turn the NULL and CONSTANT flags on. - std::vector flags; - uint32_t flag = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; - flags.push_back(flag); - context.setDataFlags(&flags); + uint32_t flags[1]; + flags[0] = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; + fRGContext.setDataFlags(flags); // Create a dummy datum - mcsv1sdk::ColumnDatum datum; + mcsv1sdk::ColumnDatum& datum = valsIn[0]; datum.dataType = execplan::CalpontSystemCatalog::BIGINT; datum.columnData = 0; - valsIn.push_back(datum); - rc = context.getFunction()->nextValue(&context, valsIn); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } static_any::any valOut; - rc = context.getFunction()->evaluate(&context, valOut); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); - context.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); } break; @@ -3460,30 +3651,28 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData case ROWAGG_UDAF: { bool bInterrupted = false; - mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext); - context.setInterrupted(bInterrupted); - context.createUserData(); + fRGContext.setInterrupted(bInterrupted); + fRGContext.createUserData(); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - std::vector valsIn; + mcsv1sdk::ColumnDatum valsIn[1]; // Call a reset, then nextValue, then execute. This will evaluate // the UDAF for the constant. - rc = context.getFunction()->reset(&context); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Turn the CONSTANT flags on. - std::vector flags; - uint32_t flag = mcsv1sdk::PARAM_IS_CONSTANT; - flags.push_back(flag); - context.setDataFlags(&flags); + uint32_t flags[1]; + flags[0] = mcsv1sdk::PARAM_IS_CONSTANT; + fRGContext.setDataFlags(flags); // Create a datum item for sending to UDAF - mcsv1sdk::ColumnDatum datum; + mcsv1sdk::ColumnDatum& datum = valsIn[0]; datum.dataType = (CalpontSystemCatalog::ColDataType)colDataType; switch (colDataType) @@ -3567,27 +3756,27 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData break; } - valsIn.push_back(datum); - rc = context.getFunction()->nextValue(&context, valsIn); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } static_any::any valOut; - rc = context.getFunction()->evaluate(&context, valOut); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); - context.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); } break; @@ -3806,7 +3995,7 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { @@ -4011,45 +4200,43 @@ void RowAggregationUMP2::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut // rowUDAF(in) - pointer to the RowUDAFFunctionCol for this UDAF instance //------------------------------------------------------------------------------ void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF) + RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { static_any::any valOut; - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); // Get the user data boost::shared_ptr userData = rowIn.getUserData(colIn + 1); // Unlike other aggregates, the data isn't in colIn, so testing it for NULL // there won't help. In case of NULL, userData will be NULL. - std::vector flags; - uint32_t flag = 0; + uint32_t flags[1]; + flags[0] = 0; if (!userData) { - if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { return; } // Turn on NULL flags - flag |= mcsv1sdk::PARAM_IS_NULL; + flags[0] |= mcsv1sdk::PARAM_IS_NULL; } - flags.push_back(flag); - rgContext.setDataFlags(&flags); + fRGContext.setDataFlags(flags); // The intermediate values are stored in colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setUserData(fRow.getUserData(colAux)); // Call the UDAF subEvaluate method mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->subEvaluate(&rgContext, userData.get()); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userData.get()); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::IDBExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::IDBExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } } @@ -4246,7 +4433,7 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index b6294f193..282f354fc 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -110,6 +110,9 @@ enum RowAggFunctionType // User Defined Aggregate Function ROWAGG_UDAF, + // If an Aggregate has more than one parameter, this will be used for parameters after the first + ROWAGG_MULTI_PARM, + // internal function type to avoid duplicate the work // handling ROWAGG_COUNT_NO_OP, ROWAGG_DUP_FUNCT and ROWAGG_DUP_AVG is a little different // ROWAGG_COUNT_NO_OP : count done by AVG, no need to copy @@ -583,7 +586,7 @@ protected: virtual void doAvg(const Row&, int64_t, int64_t, int64_t); virtual void doStatistics(const Row&, int64_t, int64_t, int64_t); virtual void doBitOp(const Row&, int64_t, int64_t, int); - virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); + virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); virtual bool countSpecial(const RowGroup* pRG) { fRow.setIntField<8>(fRow.getIntField<8>(0) + pRG->getRowCount(), 0); @@ -660,6 +663,25 @@ protected: //need access to rowgroup storage holding the rows to hash & ==. friend class AggHasher; friend class AggComparator; + + // We need a separate copy for each thread. + mcsv1sdk::mcsv1Context fRGContext; + + // These are handy for testing the actual type of static_any for UDAF + static const static_any::any& charTypeId; + static const static_any::any& scharTypeId; + static const static_any::any& shortTypeId; + static const static_any::any& intTypeId; + static const static_any::any& longTypeId; + static const static_any::any& llTypeId; + static const static_any::any& ucharTypeId; + static const static_any::any& ushortTypeId; + static const static_any::any& uintTypeId; + static const static_any::any& ulongTypeId; + static const static_any::any& ullTypeId; + static const static_any::any& floatTypeId; + static const static_any::any& doubleTypeId; + static const static_any::any& strTypeId; }; //------------------------------------------------------------------------------ @@ -783,6 +805,9 @@ protected: // Sets the value from valOut into column colOut, performing any conversions. void SetUDAFValue(static_any::any& valOut, int64_t colOut); + // If the datatype returned by evaluate isn't what we expect, convert. + void SetUDAFAnyValue(static_any::any& valOut, int64_t colOut); + // calculate the UDAF function all rows received. UM only function. void calculateUDAFColumns(); @@ -877,7 +902,7 @@ protected: void doStatistics(const Row&, int64_t, int64_t, int64_t); void doGroupConcat(const Row&, int64_t, int64_t); void doBitOp(const Row&, int64_t, int64_t, int); - void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); + void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); bool countSpecial(const RowGroup* pRG) { return false; diff --git a/utils/udfsdk/CMakeLists.txt b/utils/udfsdk/CMakeLists.txt index e69ff4d88..01009e35a 100755 --- a/utils/udfsdk/CMakeLists.txt +++ b/utils/udfsdk/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ########### next target ############### -set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp) +set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp regr_avgx.cpp avgx.cpp) add_definitions(-DMYSQL_DYNAMIC_PLUGIN) diff --git a/utils/udfsdk/allnull.cpp b/utils/udfsdk/allnull.cpp index b6b8d79da..247b9e28f 100644 --- a/utils/udfsdk/allnull.cpp +++ b/utils/udfsdk/allnull.cpp @@ -27,11 +27,11 @@ struct allnull_data #define OUT_TYPE int64_t mcsv1_UDAF::ReturnCode allnull::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { context->setUserDataSize(sizeof(allnull_data)); - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -52,8 +52,7 @@ mcsv1_UDAF::ReturnCode allnull::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { struct allnull_data* data = (struct allnull_data*)context->getUserData()->data; diff --git a/utils/udfsdk/allnull.h b/utils/udfsdk/allnull.h index 86697b052..da17f5d6b 100644 --- a/utils/udfsdk/allnull.h +++ b/utils/udfsdk/allnull.h @@ -103,7 +103,7 @@ public: * colTypes or wrong number of arguments. Else return * mcsv1_UDAF::SUCCESS. */ - virtual ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); /** * reset() @@ -138,7 +138,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() diff --git a/utils/udfsdk/avg_mode.cpp b/utils/udfsdk/avg_mode.cpp index f39b5e402..5429183d9 100644 --- a/utils/udfsdk/avg_mode.cpp +++ b/utils/udfsdk/avg_mode.cpp @@ -25,9 +25,9 @@ using namespace mcsv1sdk; mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("avg_mode() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode avg_mode::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; MODE_DATA& data = static_cast(context->getUserData())->mData; @@ -187,8 +186,7 @@ mcsv1_UDAF::ReturnCode avg_mode::evaluate(mcsv1Context* context, static_any::any return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; MODE_DATA& data = static_cast(context->getUserData())->mData; diff --git a/utils/udfsdk/avg_mode.h b/utils/udfsdk/avg_mode.h index 4f3442005..5722c5fea 100644 --- a/utils/udfsdk/avg_mode.h +++ b/utils/udfsdk/avg_mode.h @@ -18,7 +18,7 @@ /*********************************************************************** * $Id$ * -* mcsv1_UDAF.h +* avg_mode.h ***********************************************************************/ /** @@ -50,8 +50,8 @@ * is also used to describe the interface that is used for * either. */ -#ifndef HEADER_mode -#define HEADER_mode +#ifndef HEADER_avg_mode +#define HEADER_avg_mode #include #include @@ -134,7 +134,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -169,8 +169,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -246,8 +245,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() diff --git a/utils/udfsdk/mcsv1_udaf.cpp b/utils/udfsdk/mcsv1_udaf.cpp index 349a642ec..ee08dcc07 100644 --- a/utils/udfsdk/mcsv1_udaf.cpp +++ b/utils/udfsdk/mcsv1_udaf.cpp @@ -36,6 +36,8 @@ UDAF_MAP UDAFMap::fm; #include "ssq.h" #include "median.h" #include "avg_mode.h" +#include "regr_avgx.h" +#include "avgx.h" UDAF_MAP& UDAFMap::getMap() { if (fm.size() > 0) @@ -52,6 +54,8 @@ UDAF_MAP& UDAFMap::getMap() fm["ssq"] = new ssq(); fm["median"] = new median(); fm["avg_mode"] = new avg_mode(); + fm["regr_avgx"] = new regr_avgx(); + fm["avgx"] = new avgx(); return fm; } @@ -115,8 +119,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const { // We don't test the per row data fields. They don't determine // if it's the same Context. - if (getName() != c.getName() - || fRunFlags != c.fRunFlags + if (getName() != c.getName() + ||fRunFlags != c.fRunFlags || fContextFlags != c.fContextFlags || fUserDataSize != c.fUserDataSize || fResultType != c.fResultType @@ -125,7 +129,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const || fStartFrame != c.fStartFrame || fEndFrame != c.fEndFrame || fStartConstant != c.fStartConstant - || fEndConstant != c.fEndConstant) + || fEndConstant != c.fEndConstant + || fParamCount != c.fParamCount) return false; return true; @@ -217,6 +222,7 @@ void mcsv1Context::serialize(messageqcpp::ByteStream& b) const b << (uint32_t)fEndFrame; b << fStartConstant; b << fEndConstant; + b << fParamCount; } void mcsv1Context::unserialize(messageqcpp::ByteStream& b) @@ -238,6 +244,7 @@ void mcsv1Context::unserialize(messageqcpp::ByteStream& b) fEndFrame = (WF_FRAME)frame; b >> fStartConstant; b >> fEndConstant; + b >> fParamCount; } void UserData::serialize(messageqcpp::ByteStream& bs) const diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index d24852c28..df3f47649 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -77,6 +77,7 @@ #include "any.hpp" #include "calpontsystemcatalog.h" #include "wf_frame.h" +#include "my_decimal_limits.h" using namespace execplan; @@ -200,12 +201,8 @@ static uint64_t CONTEXT_IS_PM __attribute__ ((unused)) = 1 << 2; // Flags that describe the contents of a specific input parameter // These will be set in context->dataFlags for each method call by the framework. // User code shouldn't use these directly -static uint64_t PARAM_IS_NULL __attribute__ ((unused)) = 1; -static uint64_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; - -// shorthand for the list of columns in the call sent to init() -// first is the actual column name and second is the data type in Columnstore. -typedef std::vector >COL_TYPES; +static uint32_t PARAM_IS_NULL __attribute__ ((unused)) = 1; +static uint32_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; // This is the context class that is passed to all API callbacks // The framework potentially sets data here for each invocation of @@ -269,7 +266,9 @@ public: EXPORT bool isPM(); // Parameter refinement description accessors - // valid in nextValue and dropValue + + // How many actual parameters were entered. + // valid in all calls size_t getParameterCount() const; // Determine if an input parameter is NULL @@ -298,6 +297,7 @@ public: // This only makes sense if the return type is decimal, but should be set // to (0, -1) for other types if the inout is decimal. // valid in init() + // Set the scale to DECIMAL_NOT_SPECIFIED if you want a floating decimal. EXPORT bool setScale(int32_t scale); EXPORT bool setPrecision(int32_t precision); @@ -372,7 +372,7 @@ private: int32_t fResultscale; // For scale, the number of digits to the right of the decimal int32_t fResultPrecision; // The max number of digits allowed in the decimal value std::string errorMsg; - std::vector* dataFlags; // one entry for each parameter + uint32_t* dataFlags; // an integer array wirh one entry for each parameter bool* bInterrupted; // Gets set to true by the Framework if something happens WF_FRAME fStartFrame; // Is set to default to start, then modified by the actual frame in the call WF_FRAME fEndFrame; // Is set to default to start, then modified by the actual frame in the call @@ -380,6 +380,7 @@ private: int32_t fEndConstant; // for end frame WF_PRECEEDIMG or WF_FOLLOWING std::string functionName; mcsv1sdk::mcsv1_UDAF* func; + int32_t fParamCount; public: // For use by the framework @@ -394,13 +395,14 @@ public: EXPORT void clearContextFlag(uint64_t flag); EXPORT uint64_t getContextFlags() const; EXPORT uint32_t getUserDataSize() const; - EXPORT std::vector& getDataFlags(); - EXPORT void setDataFlags(std::vector* flags); + EXPORT uint32_t* getDataFlags(); + EXPORT void setDataFlags(uint32_t* flags); EXPORT void setInterrupted(bool interrupted); EXPORT void setInterrupted(bool* interrupted); EXPORT mcsv1sdk::mcsv1_UDAF* getFunction(); EXPORT mcsv1sdk::mcsv1_UDAF* getFunction() const; EXPORT boost::shared_ptr getUserDataSP(); + EXPORT void setParamCount(int32_t paramCount); }; // Since aggregate functions can operate on any data type, we use the following structure @@ -419,9 +421,10 @@ public: struct ColumnDatum { CalpontSystemCatalog::ColDataType dataType; // defined in calpontsystemcatalog.h - static_any::any columnData; + static_any::any columnData; // Not valid in init() uint32_t scale; // If dataType is a DECIMAL type uint32_t precision; // If dataType is a DECIMAL type + std::string alias; // Only filled in for init() ColumnDatum() : dataType(CalpontSystemCatalog::UNDEFINED), scale(0), precision(-1) {}; }; @@ -466,7 +469,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes) = 0; + ColumnDatum* colTypes) = 0; /** * reset() @@ -501,8 +504,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn) = 0; + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn) = 0; /** * subEvaluate() @@ -579,8 +581,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() @@ -640,32 +641,32 @@ inline mcsv1Context::mcsv1Context() : fEndFrame(WF_CURRENT_ROW), fStartConstant(0), fEndConstant(0), - func(NULL) + func(NULL), + fParamCount(0) { } inline mcsv1Context::mcsv1Context(const mcsv1Context& rhs) : - fContextFlags(0), - fColWidth(0), - dataFlags(NULL), - bInterrupted(NULL), - func(NULL) + dataFlags(NULL) { copy(rhs); } inline mcsv1Context& mcsv1Context::copy(const mcsv1Context& rhs) { - fRunFlags = rhs.getRunFlags(); - fResultType = rhs.getResultType(); - fUserDataSize = rhs.getUserDataSize(); - fResultscale = rhs.getScale(); - fResultPrecision = rhs.getPrecision(); + fRunFlags = rhs.fRunFlags; + fContextFlags = rhs.fContextFlags; + fResultType = rhs.fResultType; + fUserDataSize = rhs.fUserDataSize; + fColWidth = rhs.fColWidth; + fResultscale = rhs.fResultscale; + fResultPrecision = rhs.fResultPrecision; rhs.getStartFrame(fStartFrame, fStartConstant); rhs.getEndFrame(fEndFrame, fEndConstant); - functionName = rhs.getName(); - bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference - func = rhs.func; + functionName = rhs.functionName; + bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference + func = rhs.func; + fParamCount = rhs.fParamCount; return *this; } @@ -675,11 +676,7 @@ inline mcsv1Context::~mcsv1Context() inline mcsv1Context& mcsv1Context::operator=(const mcsv1Context& rhs) { - fContextFlags = 0; - fColWidth = 0; dataFlags = NULL; - bInterrupted = NULL; - func = NULL; return copy(rhs); } @@ -753,16 +750,13 @@ inline bool mcsv1Context::isPM() inline size_t mcsv1Context::getParameterCount() const { - if (dataFlags) - return dataFlags->size(); - - return 0; + return fParamCount; } inline bool mcsv1Context::isParamNull(int paramIdx) { if (dataFlags) - return (*dataFlags)[paramIdx] & PARAM_IS_NULL; + return dataFlags[paramIdx] & PARAM_IS_NULL; return false; } @@ -770,7 +764,7 @@ inline bool mcsv1Context::isParamNull(int paramIdx) inline bool mcsv1Context::isParamConstant(int paramIdx) { if (dataFlags) - return (*dataFlags)[paramIdx] & PARAM_IS_CONSTANT; + return dataFlags[paramIdx] & PARAM_IS_CONSTANT; return false; } @@ -939,18 +933,22 @@ inline uint32_t mcsv1Context::getUserDataSize() const return fUserDataSize; } -inline std::vector& mcsv1Context::getDataFlags() +inline uint32_t* mcsv1Context::getDataFlags() { - return *dataFlags; + return dataFlags; } -inline void mcsv1Context::setDataFlags(std::vector* flags) +inline void mcsv1Context::setDataFlags(uint32_t* flags) { dataFlags = flags; } -inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, - std::vector& valsDropped) +inline void mcsv1Context::setParamCount(int32_t paramCount) +{ + fParamCount = paramCount; +} + +inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { return NOT_IMPLEMENTED; } diff --git a/utils/udfsdk/median.cpp b/utils/udfsdk/median.cpp index e32d721f1..9c7e72dc3 100644 --- a/utils/udfsdk/median.cpp +++ b/utils/udfsdk/median.cpp @@ -25,9 +25,9 @@ using namespace mcsv1sdk; mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("median() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode median::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; MEDIAN_DATA& data = static_cast(context->getUserData())->mData; @@ -212,8 +211,7 @@ mcsv1_UDAF::ReturnCode median::evaluate(mcsv1Context* context, static_any::any& return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; MEDIAN_DATA& data = static_cast(context->getUserData())->mData; diff --git a/utils/udfsdk/median.h b/utils/udfsdk/median.h index d64792461..142be6ba8 100644 --- a/utils/udfsdk/median.h +++ b/utils/udfsdk/median.h @@ -134,7 +134,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -169,8 +169,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -246,8 +245,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() diff --git a/utils/udfsdk/ssq.cpp b/utils/udfsdk/ssq.cpp index 4d9ef7e10..20fdc33db 100644 --- a/utils/udfsdk/ssq.cpp +++ b/utils/udfsdk/ssq.cpp @@ -34,9 +34,9 @@ struct ssq_data #define OUT_TYPE int64_t mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -44,13 +44,13 @@ mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("ssq() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -81,8 +81,7 @@ mcsv1_UDAF::ReturnCode ssq::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; @@ -183,8 +182,7 @@ mcsv1_UDAF::ReturnCode ssq::evaluate(mcsv1Context* context, static_any::any& val return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; diff --git a/utils/udfsdk/ssq.h b/utils/udfsdk/ssq.h index 514c7a3f0..2cac61c2c 100644 --- a/utils/udfsdk/ssq.h +++ b/utils/udfsdk/ssq.h @@ -114,7 +114,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -147,8 +147,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -224,8 +223,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); protected: }; diff --git a/utils/udfsdk/udfmysql.cpp b/utils/udfsdk/udfmysql.cpp index 981651c43..dc0277ccc 100644 --- a/utils/udfsdk/udfmysql.cpp +++ b/utils/udfsdk/udfmysql.cpp @@ -490,6 +490,168 @@ extern "C" // return data->sumsq; return 0; } + +//======================================================================= + + /** + * regr_avgx connector stub + */ + struct regr_avgx_data + { + double sumx; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_avgx_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_avgx() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // TODO test for NULL in x and y + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + ++data->cnt; + data->sumx += xval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } + +//======================================================================= + + /** + * avgx connector stub. Exactly the same functionality as the + * built in avg() function. Use to test the performance of the + * API + */ + struct avgx_data + { + double sumx; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct avgx_data* data; + if (args->arg_count != 1) + { + strcpy(message,"avgx() requires one argument"); + return 1; + } + + if (!(data = (struct avgx_data*) malloc(sizeof(struct avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct avgx_data* data = (struct avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // TODO test for NULL in x and y + struct avgx_data* data = (struct avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + ++data->cnt; + data->sumx += xval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct avgx_data* data = (struct avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } } // vim:ts=4 sw=4: diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index 664b0e7de..fe1f3fd0e 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -204,8 +204,10 @@ Filters="*.c;*.C;*.cc;*.cpp;*.cp;*.cxx;*.c++;*.prg;*.pas;*.dpr;*.asm;*.s;*.bas;*.java;*.cs;*.sc;*.e;*.cob;*.html;*.rc;*.tcl;*.py;*.pl;*.d"> + + @@ -215,8 +217,10 @@ Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if"> + + diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index f302c49cd..5cd5243c5 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -52,6 +52,7 @@ using namespace joblist; namespace windowfunction { + template boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context) { @@ -142,7 +143,7 @@ template void WF_udaf::resetData() { getContext().getFunction()->reset(&getContext()); - fSet.clear(); + fDistinctSet.clear(); WindowFunctionType::resetData(); } @@ -150,8 +151,8 @@ template void WF_udaf::parseParms(const std::vector& parms) { bRespectNulls = true; - // parms[1]: respect null | ignore null - ConstantColumn* cc = dynamic_cast(parms[1].get()); + // The last parms: respect null | ignore null + ConstantColumn* cc = dynamic_cast(parms[parms.size()-1].get()); idbassert(cc != NULL); bool isNull = false; // dummy, harded coded bRespectNulls = (cc->getIntVal(fRow, isNull) > 0); @@ -167,52 +168,71 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) } mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - uint64_t colOut = fFieldIndex[0]; - uint64_t colIn = fFieldIndex[1]; - mcsv1sdk::ColumnDatum datum; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colOut); + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC); + + // Put the parameter metadata (type, scale, precision) into valsIn + mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) + { + uint64_t colIn = fFieldIndex[i+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } for (int64_t i = b; i < e; i++) { if (i % 1000 == 0 && fStep->cancelled()) break; + bool bHasNull = false; fRow.setData(getPointer(fRowData->at(i))); // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; + uint32_t flags[getContext().getParameterCount()]; - if (fRow.isNullValue(colIn) == true) + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - if (!bRespectNulls) + uint64_t colIn = fFieldIndex[k+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[k]; + flags[k] = 0; + if (fRow.isNullValue(colIn) == true) { - continue; + if (!bRespectNulls) + { + bHasNull = true; + break; + } + + flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - flag |= mcsv1sdk::PARAM_IS_NULL; + T valIn; + getValue(colIn, valIn, &datum.dataType); + + // Check for distinct, if turned on. + // Currently, distinct only works for param 1 + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + + datum.columnData = valIn; } - - flags.push_back(flag); - getContext().setDataFlags(&flags); - - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - // TODO: when we impliment distinct, we need to revist this. - if ((fDistinct) || (fSet.find(valIn) != fSet.end())) + if (bHasNull) { continue; } - datum.columnData = valIn; - - std::vector valsIn; - valsIn.push_back(datum); - rc = getContext().getFunction()->dropValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::NOT_IMPLEMENTED) @@ -442,59 +462,191 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) else if (fPrev <= e && fPrev > c) e = c; - uint64_t colIn = fFieldIndex[1]; + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC); - mcsv1sdk::ColumnDatum datum; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colOut); + // Put the parameter metadata (type, scale, precision) into valsIn + mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) + { + uint64_t colIn = fFieldIndex[i+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } if (b <= c && c <= e) getContext().setContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); else getContext().clearContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); - + bool bHasNull = false; for (int64_t i = b; i <= e; i++) { if (i % 1000 == 0 && fStep->cancelled()) break; fRow.setData(getPointer(fRowData->at(i))); - // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; - if (fRow.isNullValue(colIn) == true) + // NULL flags + uint32_t flags[getContext().getParameterCount()]; + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - if (!bRespectNulls) + uint64_t colIn = fFieldIndex[k+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[k]; + + // Turn on Null flags or skip based on respect nulls + flags[k] = 0; + if (fRow.isNullValue(colIn) == true) + { + if (!bRespectNulls) + { + bHasNull = true; + break; + } + + flags[k] |= mcsv1sdk::PARAM_IS_NULL; + } + + // MCOL-1201 Multi-Paramter calls + switch (datum.dataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + { + int64_t valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + case CalpontSystemCatalog::UDECIMAL: + { + uint64_t valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + { + double valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + { + float valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::BLOB: + { + string valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + default: + { + string errStr = "(" + colType2String[i] + ")"; + errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + + break; + } + } + // Skip if any value is NULL and respect nulls is off. + if (bHasNull) { continue; } - - flag |= mcsv1sdk::PARAM_IS_NULL; } - - flags.push_back(flag); - getContext().setDataFlags(&flags); - - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - if ((fDistinct) || (fSet.find(valIn) != fSet.end())) - { - continue; - } - - if (fDistinct) - fSet.insert(valIn); - - datum.columnData = valIn; - - std::vector valsIn; - valsIn.push_back(datum); - + getContext().setDataFlags(flags); + rc = getContext().getFunction()->nextValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) diff --git a/utils/windowfunction/wf_udaf.h b/utils/windowfunction/wf_udaf.h index babb32565..f7a4c4b08 100644 --- a/utils/windowfunction/wf_udaf.h +++ b/utils/windowfunction/wf_udaf.h @@ -21,13 +21,35 @@ #ifndef UTILS_WF_UDAF_H #define UTILS_WF_UDAF_H -#include +#ifndef _MSC_VER +#include +#else +#include +#endif #include "windowfunctiontype.h" #include "mcsv1_udaf.h" namespace windowfunction { +// Hash classes for the distinct hashmap +class DistinctHasher +{ +public: + inline size_t operator()(const static_any::any& a) const + { + return a.getHash(); + } +}; + +class DistinctEqual +{ +public: + inline bool operator()(const static_any::any& lhs, static_any::any& rhs) const + { + return lhs == rhs; + } +}; // A class to control the execution of User Define Analytic Functions (UDAnF) // as defined by a specialization of mcsv1sdk::mcsv1_UDAF @@ -72,7 +94,8 @@ protected: bool fDistinct; bool bRespectNulls; // respect null | ignore null bool bHasDropValue; // Set to false when we discover the UDAnF doesn't implement dropValue. - std::set fSet; // To hold distinct values + // To hold distinct values + std::tr1::unordered_set fDistinctSet; static_any::any fValOut; // The return value public: diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp index 950045899..4c5b4de32 100644 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -492,10 +492,10 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos) static uint64_t dateNull = joblist::DATENULL; static uint64_t datetimeNull = joblist::DATETIMENULL; static uint64_t timeNull = joblist::TIMENULL; - static uint64_t char1Null = joblist::CHAR1NULL; - static uint64_t char2Null = joblist::CHAR2NULL; - static uint64_t char4Null = joblist::CHAR4NULL; - static uint64_t char8Null = joblist::CHAR8NULL; +// static uint64_t char1Null = joblist::CHAR1NULL; +// static uint64_t char2Null = joblist::CHAR2NULL; +// static uint64_t char4Null = joblist::CHAR4NULL; +// static uint64_t char8Null = joblist::CHAR8NULL; static string stringNull(""); void* v = NULL; diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index 5d3dfec85..41c788693 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -1280,7 +1280,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -2024,10 +2024,10 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 0; k < colStructList.size(); k++) + for (size_t k = 0; k < colStructList.size(); k++) { // Skip the selected column - if (k == colId) + if (k == (size_t)colId) continue; Column expandCol; @@ -2582,7 +2582,7 @@ int WriteEngineWrapper::insertColumnRec_SYS(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -3277,7 +3277,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; From 30f9aa71cd1c14b3dc612ab9c518655669d3090d Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 11 May 2018 09:53:49 -0500 Subject: [PATCH 005/123] MCOL-1201 Add test UDAF back in after rebase --- utils/udfsdk/avgx.cpp | 257 +++++++++++++++++++++++++++++++++++ utils/udfsdk/avgx.h | 99 ++++++++++++++ utils/udfsdk/regr_avgx.cpp | 270 +++++++++++++++++++++++++++++++++++++ utils/udfsdk/regr_avgx.h | 99 ++++++++++++++ 4 files changed, 725 insertions(+) create mode 100644 utils/udfsdk/avgx.cpp create mode 100644 utils/udfsdk/avgx.h create mode 100644 utils/udfsdk/regr_avgx.cpp create mode 100644 utils/udfsdk/regr_avgx.h diff --git a/utils/udfsdk/avgx.cpp b/utils/udfsdk/avgx.cpp new file mode 100644 index 000000000..887a8418e --- /dev/null +++ b/utils/udfsdk/avgx.cpp @@ -0,0 +1,257 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "avgx.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +#define DATATYPE double + +// Use the simple data model +struct avgx_data +{ + double sum; + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode avgx::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 1) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("avgx() with other than 1 arguments"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[0].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("avgx() with a non-numeric x argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(avgx_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 4); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode avgx::reset(mcsv1Context* context) +{ + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + data->sum = 0; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_x = valsIn[0].columnData; + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[0].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum += val; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct avgx_data* outData = (struct avgx_data*)context->getUserData()->data; + struct avgx_data* inData = (struct avgx_data*)userDataIn->data; + + outData->sum += inData->sum; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + + valOut = data->sum / (double)data->cnt; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_x = valsDropped[0].columnData; + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[0].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum -= val; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/avgx.h b/utils/udfsdk/avgx.h new file mode 100644 index 000000000..0569b6091 --- /dev/null +++ b/utils/udfsdk/avgx.h @@ -0,0 +1,99 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* avgx.h +***********************************************************************/ + +/** + * Columnstore interface for for the avgx function + * + * + * CREATE AGGREGATE FUNCTION avgx returns REAL soname + * 'libudf_mysql.so'; + * + */ +#ifndef HEADER_avgx +#define HEADER_avgx + +#include +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. + +// Return the avgx value of the dataset + +class avgx : public mcsv1_UDAF +{ +public: + // Defaults OK + avgx() : mcsv1_UDAF() {}; + virtual ~avgx() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_.h + diff --git a/utils/udfsdk/regr_avgx.cpp b/utils/udfsdk/regr_avgx.cpp new file mode 100644 index 000000000..c7cc5b56e --- /dev/null +++ b/utils/udfsdk/regr_avgx.cpp @@ -0,0 +1,270 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_avgx.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +#define DATATYPE double + +// Use the simple data model +struct regr_avgx_data +{ + double sum; + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode regr_avgx::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgx() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[1].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgx() with a non-numeric x argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_avgx_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[1].scale + 4); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_avgx::reset(mcsv1Context* context) +{ + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + data->sum = 0; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (context->isParamNull(0) || context->isParamNull(1)) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + if (valIn_x.empty() || valIn_y.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[1].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum += val; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_avgx_data* outData = (struct regr_avgx_data*)context->getUserData()->data; + struct regr_avgx_data* inData = (struct regr_avgx_data*)userDataIn->data; + + outData->sum += inData->sum; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + + if (data->cnt == 0) + { + valOut = 0; + } + else + { + valOut = data->sum / (double)data->cnt; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty() || valIn_y.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[1].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum -= val; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/regr_avgx.h b/utils/udfsdk/regr_avgx.h new file mode 100644 index 000000000..f70f30d8c --- /dev/null +++ b/utils/udfsdk/regr_avgx.h @@ -0,0 +1,99 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_avgx.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_avgx function + * + * + * CREATE AGGREGATE FUNCTION regr_avgx returns REAL soname + * 'libudf_mysql.so'; + * + */ +#ifndef HEADER_regr_avgx +#define HEADER_regr_avgx + +#include +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. + +// Return the regr_avgx value of the dataset + +class regr_avgx : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_avgx() : mcsv1_UDAF() {}; + virtual ~regr_avgx() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_avgx.h + From 5b1f5d5fe400b00a83a9b38ac66b7d7b34d1640a Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Mon, 14 May 2018 22:03:25 +0100 Subject: [PATCH 006/123] MCOL-1412 Ubuntu 18.04 support Backport Ubuntu 18.04 support to 1.1 --- dbcon/joblist/tupleunion.cpp | 6 +++--- utils/common/any.hpp | 28 +++++++++++++++++++--------- utils/common/cgroupconfigurator.cpp | 1 + utils/threadpool/threadpool.cpp | 1 + 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/dbcon/joblist/tupleunion.cpp b/dbcon/joblist/tupleunion.cpp index 2fdd4330f..2ae631abb 100644 --- a/dbcon/joblist/tupleunion.cpp +++ b/dbcon/joblist/tupleunion.cpp @@ -47,7 +47,7 @@ using namespace dataconvert; #endif namespace { //returns the value of 10 raised to the power x. -inline double pow10(double x) +inline double exp10(double x) { return exp(x * M_LN10); } @@ -406,7 +406,7 @@ void TupleUnion::normalize(const Row &in, Row *out) ostringstream os; if (in.getScale(i)) { double d = in.getIntField(i); - d /= pow10(in.getScale(i)); + d /= exp10(in.getScale(i)); os.precision(15); os << d; } @@ -488,7 +488,7 @@ dec1: uint64_t val = in.getIntField(i); ostringstream os; if (in.getScale(i)) { double d = in.getUintField(i); - d /= pow10(in.getScale(i)); + d /= exp10(in.getScale(i)); os.precision(15); os << d; } diff --git a/utils/common/any.hpp b/utils/common/any.hpp index 5265015f1..be0ca679b 100755 --- a/utils/common/any.hpp +++ b/utils/common/any.hpp @@ -54,15 +54,25 @@ namespace anyimpl template struct big_any_policy : typed_base_any_policy { - virtual void static_delete(void** x) { if (*x) - delete(*reinterpret_cast(x)); *x = NULL; } - virtual void copy_from_value(void const* src, void** dest) { - *dest = new T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) { - *dest = new T(**reinterpret_cast(src)); } - virtual void move(void* const* src, void** dest) { - (*reinterpret_cast(dest))->~T(); - **reinterpret_cast(dest) = **reinterpret_cast(src); } + virtual void static_delete(void** x) + { + if (*x) + delete(*reinterpret_cast(x)); + *x = NULL; + } + virtual void copy_from_value(void const* src, void** dest) + { + *dest = new T(*reinterpret_cast(src)); + } + virtual void clone(void* const* src, void** dest) + { + *dest = new T(**reinterpret_cast(src)); + } + virtual void move(void* const* src, void** dest) + { + (*reinterpret_cast(dest))->~T(); + **reinterpret_cast(dest) = **reinterpret_cast(src); + } virtual void* get_value(void** src) { return *src; } }; diff --git a/utils/common/cgroupconfigurator.cpp b/utils/common/cgroupconfigurator.cpp index a4a67d68e..fcf7ef0e9 100644 --- a/utils/common/cgroupconfigurator.cpp +++ b/utils/common/cgroupconfigurator.cpp @@ -19,6 +19,7 @@ #include "configcpp.h" #include "logger.h" #include +#include #include #ifdef _MSC_VER #include "unistd.h" diff --git a/utils/threadpool/threadpool.cpp b/utils/threadpool/threadpool.cpp index 197bdedd9..d903f9892 100644 --- a/utils/threadpool/threadpool.cpp +++ b/utils/threadpool/threadpool.cpp @@ -21,6 +21,7 @@ * ***********************************************************************/ #include +#include using namespace std; #include "messageobj.h" From 6ccfbb2a236f8106b93734d018899f3653d9b526 Mon Sep 17 00:00:00 2001 From: David Hall Date: Mon, 14 May 2018 17:28:24 -0500 Subject: [PATCH 007/123] MCOL-1201 some fixes from testing --- dbcon/joblist/tupleaggregatestep.cpp | 229 ++++++++++++--------------- dbcon/mysql/ha_calpont_execplan.cpp | 1 - utils/common/common.vpj | 2 + utils/rowgroup/rowaggregation.cpp | 4 +- 4 files changed, 106 insertions(+), 130 deletions(-) diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 21c7c0af6..00fa26a4c 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -852,7 +852,6 @@ SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo) if (ac->aggOp() == ROWAGG_UDAF) { UDAFColumn* udafc = dynamic_cast(ac); - if (udafc) { constAggDataVec.push_back( @@ -1097,8 +1096,9 @@ void TupleAggregateStep::prep1PhaseAggregate( vector functionVec; uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); + // For UDAF uint32_t projColsUDAFIndex = 0; - + UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // for count column of average function map avgFuncMap; @@ -1287,12 +1287,10 @@ void TupleAggregateStep::prep1PhaseAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -1300,12 +1298,10 @@ void TupleAggregateStep::prep1PhaseAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, i)); break; } - } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -1474,8 +1470,6 @@ void TupleAggregateStep::prep1PhaseAggregate( throw logic_error("(2)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } - pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); - // Return column oidsAgg.push_back(oidsProj[colProj]); keysAgg.push_back(key); @@ -1677,8 +1671,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( uint32_t bigIntWidth = sizeof(int64_t); // map key = column key, operation (enum), and UDAF pointer if UDAF. AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; set avgSet; + + // fOR udaf + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; uint32_t projColsUDAFIndex = 0; // for count column of average function @@ -1847,7 +1844,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; if (udafc) @@ -1857,12 +1854,10 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAgg)); break; } - } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -2142,6 +2137,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // locate the return column position in aggregated rowgroup for (uint64_t i = 0; i < returnedColVec.size(); i++) { + udafc = NULL; pUDAFFunc = NULL; uint32_t retKey = returnedColVec[i].first; RowAggFunctionType aggOp = functionIdMap(returnedColVec[i].second); @@ -2150,10 +2146,21 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != @@ -2473,26 +2480,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - - for (; it != jobInfo.projectionCols.end(); it++) - { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); - break; - } - } - - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(3)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); - } + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); } else { @@ -2904,7 +2892,10 @@ void TupleAggregateStep::prep2PhasesAggregate( vector > aggColVec; set avgSet; vector >& returnedColVec = jobInfo.returnedColVec; + // For UDAF uint32_t projColsUDAFIndex = 0; + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -2947,7 +2938,6 @@ void TupleAggregateStep::prep2PhasesAggregate( uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // associate the columns between projected RG and aggregate RG on PM // populated the aggregate columns @@ -3084,12 +3074,10 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3098,10 +3086,9 @@ void TupleAggregateStep::prep2PhasesAggregate( break; } } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -3350,10 +3337,6 @@ void TupleAggregateStep::prep2PhasesAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { - // Keep a count of the parms after the first for any aggregate. - // These will be skipped and the count needs to be subtracted - // from where the aux column will be. - int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap; @@ -3369,6 +3352,8 @@ void TupleAggregateStep::prep2PhasesAggregate( } // locate the return column position in aggregated rowgroup from PM + // outIdx is i without the multi-columns, + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { uint32_t retKey = returnedColVec[i].first; @@ -3379,19 +3364,30 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_MULTI_PARM) { // Skip on UM: Extra parms for an aggregate have no work on the UM - ++multiParms; continue; } + // Is this a UDAF? use the function as part of the key. - - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; - + pUDAFFunc = NULL; + udafc = NULL; if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); @@ -3492,7 +3488,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep2PhasesAggregate: " << emsg << " oid=" @@ -3514,7 +3510,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByUm[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByUm[j]->fOutputColumnIndex = i; + groupByUm[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByUm[j]->fOutputColumnIndex; } @@ -3525,7 +3521,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (jobInfo.distinctColVec[j] == retKey) { if (groupByUm[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByUm[j]->fOutputColumnIndex = i; + groupByUm[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByUm[j]->fOutputColumnIndex; } @@ -3534,7 +3530,7 @@ void TupleAggregateStep::prep2PhasesAggregate( // a duplicate group by column if (dupGroupbyIndex != -1) functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } else { @@ -3542,30 +3538,11 @@ void TupleAggregateStep::prep2PhasesAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - - for (; it != jobInfo.projectionCols.end(); it++) - { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i-multiParms)); - break; - } - } - - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); - } + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i-multiParms)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -3600,6 +3577,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (returnedColVec[i].second == AggregateColumn::AVG) avgFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // now fix the AVG function, locate the count(column) position @@ -3617,7 +3595,7 @@ void TupleAggregateStep::prep2PhasesAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size() - multiParms; + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -3724,7 +3702,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector > aggColVec, aggNoDistColVec; set avgSet, avgDistSet; vector >& returnedColVec = jobInfo.returnedColVec; + // For UDAF uint32_t projColsUDAFIndex = 0; + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -3796,7 +3777,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( uint32_t bigIntWidth = sizeof(int64_t); map, uint64_t> avgFuncDistMap; AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // associate the columns between projected RG and aggregate RG on PM // populated the aggregate columns @@ -3940,12 +3920,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3954,10 +3932,9 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( break; } } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -4201,32 +4178,33 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // associate the columns between the aggregate RGs on PM and UM without distinct aggregator // populated the returned columns { + int64_t multiParms = 0; + for (uint32_t idx = 0; idx < groupByPm.size(); idx++) { SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(idx, idx)); groupByUm.push_back(groupby); } - // Keep a count of the parms after the first for any aggregate. - // These will be skipped and the count needs to be subtracted - // from where the aux column will be. - int64_t multiParms = 0; for (uint32_t idx = 0; idx < functionVecPm.size(); idx++) - { SP_ROWAGG_FUNC_t funct; SP_ROWAGG_FUNC_t funcPm = functionVecPm[idx]; - // UDAF support if (funcPm->fAggFunction == ROWAGG_MULTI_PARM) { - // Multi-Parm is not used on the UM + // Skip on UM: Extra parms for an aggregate have no work on the UM ++multiParms; continue; } + if (funcPm->fAggFunction == ROWAGG_UDAF) { RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); + if (!udafFuncCol) + { + throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + } funct.reset(new RowUDAFFunctionCol( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, @@ -4273,6 +4251,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // These will be skipped and the count needs to be subtracted // from where the aux column will be. int64_t multiParms = 0; + projColsUDAFIndex = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4286,9 +4265,12 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // locate the return column position in aggregated rowgroup from PM + // outIdx is i without the multi-columns, + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { pUDAFFunc = NULL; + udafc = NULL; uint32_t retKey = returnedColVec[i].first; RowAggFunctionType aggOp = functionIdMap(returnedColVec[i].second); @@ -4304,10 +4286,21 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != @@ -4436,7 +4429,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it != aggFuncMap.end()) { - colUm = it->second - multiParms; + colUm = it->second; oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(keysAggUm[colUm]); scaleAggDist.push_back(scaleAggUm[colUm]); @@ -4460,7 +4453,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // false alarm returnColMissing = false; - colUm = it->second - multiParms; + colUm = it->second; if (aggOp == ROWAGG_SUM) { @@ -4528,7 +4521,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep2PhasesDistinctAggregate: " << emsg << " oid=" @@ -4552,7 +4545,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByNoDist[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByNoDist[j]->fOutputColumnIndex = i; + groupByNoDist[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByNoDist[j]->fOutputColumnIndex; } @@ -4561,7 +4554,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // a duplicate group by column if (dupGroupbyIndex != -1) functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } else { @@ -4569,30 +4562,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - - for (; it != jobInfo.projectionCols.end(); it++) - { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i-multiParms)); - break; - } - } - - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); - } + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i-multiParms)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -4629,6 +4603,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // for (i // now fix the AVG function, locate the count(column) position @@ -4646,7 +4621,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size() - multiParms; + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -4706,7 +4681,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(5)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 86dc0bd2f..ec08223b0 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4570,7 +4570,6 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) udafc->resultType(ct); } } - } catch (std::logic_error e) { diff --git a/utils/common/common.vpj b/utils/common/common.vpj index 69059884c..ea67e04ba 100755 --- a/utils/common/common.vpj +++ b/utils/common/common.vpj @@ -200,6 +200,7 @@ + @@ -208,6 +209,7 @@ Name="Header Files" Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if"> + diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index c1f5bbd63..043dcaac2 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -2015,13 +2015,13 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { - int32_t paramCount = fRGContext.getParameterCount(); + uint32_t paramCount = fRGContext.getParameterCount(); // The vector of parameters to be sent to the UDAF mcsv1sdk::ColumnDatum valsIn[paramCount]; uint32_t dataFlags[paramCount]; execplan::CalpontSystemCatalog::ColDataType colDataType; - for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) + for (uint32_t i = 0; i < paramCount; ++i) { mcsv1sdk::ColumnDatum& datum = valsIn[i]; // Turn on NULL flags From 1c7ec0ddc6fe8f74c44cee1bc773f59e76cc37c8 Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 15 May 2018 13:15:45 -0500 Subject: [PATCH 008/123] MCOL-1201 Modify docs. Fix group concat bug --- dbcon/mysql/ha_calpont_execplan.cpp | 1 + utils/udfsdk/docs/source/changelog.rst | 1 + .../docs/source/reference/ColumnDatum.rst | 6 ++-- .../docs/source/reference/MariaDBUDAF.rst | 2 +- .../udfsdk/docs/source/reference/UDAFMap.rst | 2 +- .../docs/source/reference/mcsv1Context.rst | 2 +- .../docs/source/reference/mcsv1_UDAF.rst | 36 ++++++++----------- utils/udfsdk/docs/source/usage/cmakelists.rst | 2 +- utils/udfsdk/docs/source/usage/compile.rst | 2 +- utils/udfsdk/docs/source/usage/headerfile.rst | 6 ++-- .../udfsdk/docs/source/usage/introduction.rst | 4 +-- utils/udfsdk/docs/source/usage/sourcefile.rst | 29 +++++++-------- utils/udfsdk/udfsdk.vpj | 33 +++++++++++++++++ 13 files changed, 75 insertions(+), 51 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index ec08223b0..701e1c14f 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4162,6 +4162,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) rowCol->columnVec(selCols); (dynamic_cast(ac))->orderCols(orderCols); parm.reset(rowCol); + ac->aggParms().push_back(parm); if (gc->str_separator()) { diff --git a/utils/udfsdk/docs/source/changelog.rst b/utils/udfsdk/docs/source/changelog.rst index fcd93d54c..1a7c749f9 100644 --- a/utils/udfsdk/docs/source/changelog.rst +++ b/utils/udfsdk/docs/source/changelog.rst @@ -5,4 +5,5 @@ Version History | Version | Date | Changes | +=========+============+=============================+ | 1.1.0α | 2017-08-25 | - First alpha release | +| 1.2.0α | 2016-05-18 | - Add multi parm support | +---------+------------+-----------------------------+ diff --git a/utils/udfsdk/docs/source/reference/ColumnDatum.rst b/utils/udfsdk/docs/source/reference/ColumnDatum.rst index dd1006363..5304a2953 100644 --- a/utils/udfsdk/docs/source/reference/ColumnDatum.rst +++ b/utils/udfsdk/docs/source/reference/ColumnDatum.rst @@ -1,3 +1,5 @@ +.. _ColumnDatum: + ColumnDatum =========== @@ -13,7 +15,7 @@ Example for int data: int myint = valIn.cast(); -For multi-paramter aggregations (not available in Columnstore 1.1), the colsIn vector of next_value() contains the ordered set of row parameters. +For multi-paramter aggregations (not available in Columnstore 1.1), the colsIn array of next_value() contains the ordered set of row parameters. For char, varchar, text, varbinary and blob types, columnData will be std::string. @@ -59,7 +61,7 @@ The provided values are: * - SMALLINT - A signed two byte integer * - DECIMAL - - A Columnstore Decimal value. For Columnstore 1.1, this is stored in the smallest integer type field that will hold the required precision. + - A Columnstore Decimal value. This is stored in the smallest integer type field that will hold the required precision. * - MEDINT - A signed four byte integer * - INT diff --git a/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst b/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst index 1f6fa7acb..d031705d8 100644 --- a/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst +++ b/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst @@ -13,7 +13,7 @@ The library placed in mysql/lib is the name you use in the SQL CREATE AGGREGATE CREATE AGGREGATE FUNCTION ssq returns REAL soname 'libudf_mysql.so'; -Unlike the code you write for the Columnstore UDAF, MariaDB does not handle allocation and de-allocation of your memory structures. If writing your function for other engines, you must handle allocation and de-alloaction in :ref:`function_init ` and :ref:`function_deinit ` +Unlike the code you write for the Columnstore UDAF, MariaDB does not handle allocation and de-allocation of your memory structures in other engines. If writing your function for other engines, you must handle allocation and de-alloaction in :ref:`function_init ` and :ref:`function_deinit ` All of the MariaDB UDF and UDAF example functions are in a single source file named udfmysql.cpp and linked into libudf_mysql.so. diff --git a/utils/udfsdk/docs/source/reference/UDAFMap.rst b/utils/udfsdk/docs/source/reference/UDAFMap.rst index 48706bab3..d3cda63f4 100644 --- a/utils/udfsdk/docs/source/reference/UDAFMap.rst +++ b/utils/udfsdk/docs/source/reference/UDAFMap.rst @@ -3,7 +3,7 @@ UDAFMap ======= -The UDAFMap is where we tell the system about our function. For Columnstore 1.1, you must manually place your function into this map. +The UDAFMap is where we tell the system about our function. For Columnstore 1.2, you must manually place your function into this map. * open mcsv1_udaf.cpp * add your header to the #include list diff --git a/utils/udfsdk/docs/source/reference/mcsv1Context.rst b/utils/udfsdk/docs/source/reference/mcsv1Context.rst index 279220fb3..02adf57ab 100644 --- a/utils/udfsdk/docs/source/reference/mcsv1Context.rst +++ b/utils/udfsdk/docs/source/reference/mcsv1Context.rst @@ -150,7 +150,7 @@ Use these to determine the way your UDA(n)F was called .. c:function:: size_t getParameterCount() const; -:returns: the number of parameters to the function in the SQL query. Columnstore 1.1 only supports one parameter. +:returns: the number of parameters to the function in the SQL query. .. c:function:: bool isParamNull(int paramIdx); diff --git a/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst b/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst index 73c8f6570..f75fe73fc 100644 --- a/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst +++ b/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst @@ -1,4 +1,4 @@ -.. _ mcsv1_udaf: +.. _mcsv1_udaf: mcsv1_UDAF ========== @@ -11,12 +11,14 @@ The base class has no data members. It is designed to be only a container for yo However, adding static const members makes sense. -For UDAF (not Wndow Functions) Aggregation takes place in three stages: +For UDAF (not Window Functions) Aggregation takes place in three stages: * Subaggregation on the PM. nextValue() * Consolodation on the UM. subevaluate() * Evaluation of the function on the UM. evaluate() +There are situations where the system makes a choice to perform all UDAF calculations on the UM. The presence of group_concat() in the query and certain joins can cause the optimizer to make this choice. + For Window Functions, all aggregation occurs on the UM, and thus the subevaluate step is skipped. There is an optional dropValue() function that may be added. * Aggregation on the UM. nextValue() @@ -80,17 +82,11 @@ Callback Methods .. _init: -.. c:function:: ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); +.. c:function:: ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); :param context: The context object for this call. -:param colTypes: A list of the column types of the parameters. - - COL_TYPES is defined as:: - - typedef std::vector >COL_TYPES; - - In Columnstore 1.1, only one column is supported, so colTyoes will be of length one. +:param colTypes: A list of ColumnDatum structures. Use this to access the column types of the parameters. colTypes.columnData will be invalid. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS @@ -116,25 +112,23 @@ Callback Methods .. _nextvalue: -.. c:function:: ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); +.. c:function:: ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); :param context: The context object for this call -:param valsIn: a vector representing the values to be added for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. - +:param valsIn: an array representing the values to be added for each parameter for this row. + :returns: ReturnCode::ERROR or ReturnCode::SUCCESS Use context->getUserData() and type cast it to your UserData type or Simple Data Model stuct. nextValue() is called for each Window movement that passes the WHERE and HAVING clauses. The context's UserData will contain values that have been sub-aggregated to this point for the group, partition or Window Frame. nextValue is called on the PM for aggregation and on the UM for Window Functions. - When used in an aggregate, the function may not rely on order or completeness since the sub-aggregation is going on at the PM, it only has access to the data stored on the PM's dbroots. + When used in an aggregate, the function should not rely on order or completeness since the sub-aggregation is going on at the PM, it only has access to the data stored on the PM's dbroots. - When used as a analytic function (Window Function), nextValue is call for each Window movement in the Window. If dropValue is defined, then it may be called for every value leaving the Window, and nextValue called for each new value entering the Window. + When used as a analytic function (Window Function), nextValue is called for each Window movement in the Window. If dropValue is defined, then it may be called for every value leaving the Window, and nextValue called for each new value entering the Window. - Since this is called for every row, it is important that this method be efficient. + Since this may called for every row, it is important that this method be efficient. .. _subevaluate: @@ -172,13 +166,11 @@ Callback Methods .. _dropvalue: -.. c:function:: ReturnCode dropValue(mcsv1Context* context, std::vector& valsDropped); +.. c:function:: ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); :param context: The context object for this call -:param valsDropped: a vector representing the values to be dropped for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. +:param valsDropped: an array representing the values to be dropped for each parameter for this row. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS diff --git a/utils/udfsdk/docs/source/usage/cmakelists.rst b/utils/udfsdk/docs/source/usage/cmakelists.rst index 32a218459..a7ddacbaf 100644 --- a/utils/udfsdk/docs/source/usage/cmakelists.rst +++ b/utils/udfsdk/docs/source/usage/cmakelists.rst @@ -3,7 +3,7 @@ CMakeLists.txt ============== -For Columnstore 1.1, you compile your function by including it in the CMakeLists.txt file for the udfsdk. +For Columnstore 1.2, you compile your function by including it in the CMakeLists.txt file for the udfsdk. You need only add the new .cpp files to the udfsdk_LIB_SRCS target list:: diff --git a/utils/udfsdk/docs/source/usage/compile.rst b/utils/udfsdk/docs/source/usage/compile.rst index e6319e45b..b96af5d80 100644 --- a/utils/udfsdk/docs/source/usage/compile.rst +++ b/utils/udfsdk/docs/source/usage/compile.rst @@ -3,7 +3,7 @@ Compile ======= -To compile your function for Columnstore 1.1, simple recompile the udfsdk directory:: +To compile your function for Columnstore 1.2, simply recompile the udfsdk directory:: cd utils/usdsdk cmake . diff --git a/utils/udfsdk/docs/source/usage/headerfile.rst b/utils/udfsdk/docs/source/usage/headerfile.rst index 720acc5be..afb043e98 100644 --- a/utils/udfsdk/docs/source/usage/headerfile.rst +++ b/utils/udfsdk/docs/source/usage/headerfile.rst @@ -5,7 +5,7 @@ Header file Usually, each UDA(n)F function will have one .h and one .cpp file plus code for the mariadb UDAF plugin which may or may not be in a separate file. It is acceptable to put a set of related functions in the same files or use separate files for each. -The easiest way to create these files is to copy them an example closest to the type of function you intend to create. +The easiest way to create these files is to copy them from an example closest to the type of function you intend to create. Your header file must have a class defined that will implement your function. This class must be derived from mcsv1_UDAF and be in the mcsv1sdk namespace. The following examples use the "allnull" UDAF. @@ -29,9 +29,9 @@ allnull uses the Simple Data Model. See :ref:`complexdatamodel` to see how that allnull() : mcsv1_UDAF(){}; virtual ~allnull(){}; - virtual ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); virtual ReturnCode reset(mcsv1Context* context); - virtual ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn); virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); }; diff --git a/utils/udfsdk/docs/source/usage/introduction.rst b/utils/udfsdk/docs/source/usage/introduction.rst index 6b3544a1e..19c612caa 100644 --- a/utils/udfsdk/docs/source/usage/introduction.rst +++ b/utils/udfsdk/docs/source/usage/introduction.rst @@ -3,7 +3,7 @@ mcsv1_udaf Introduction mcsv1_udaf is a C++ API for writing User Defined Aggregate Functions (UDAF) and User Defined Analytic Functions (UDAnF) for the MariaDB Columstore engine. -In Columnstore 1.1.0, functions written using this API must be compiled into the udfsdk and udf_mysql libraries of the Columnstore code branch. +In Columnstore 1.2, functions written using this API must be compiled into the udfsdk and udf_mysql libraries of the Columnstore code branch. The API has a number of features. The general theme is, there is a class that represents the function, there is a context under which the function operates, and there is a data store for intermediate values. @@ -18,5 +18,5 @@ The steps required to create a function are: * :ref:`Compile udfsdk `. * :ref:`Copy the compiled libraries ` to the working directories. -In 1.1.0, Columnstore does not have a plugin framework, so the functions have to be compiled into the libraries that Columnstore already loads. +In 1.2, Columnstore does not have a plugin framework, so the functions have to be compiled into the libraries that Columnstore already loads. diff --git a/utils/udfsdk/docs/source/usage/sourcefile.rst b/utils/udfsdk/docs/source/usage/sourcefile.rst index b7ed38a32..5c43f29e4 100644 --- a/utils/udfsdk/docs/source/usage/sourcefile.rst +++ b/utils/udfsdk/docs/source/usage/sourcefile.rst @@ -34,21 +34,17 @@ Or, if using the :ref:`complexdatamodel`, type cast the UserData to your UserDat init() ------ -.. c:function:: ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); +.. c:function:: ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); :param context: The context object for this call. -:param colTypes: A list of the column types of the parameters. +:param colTypes: A list of the ColumnDatum used to access column types of the parameters. In init(), the columnData member is invalid. - COL_TYPES is defined as:: - - typedef std::vector >COL_TYPES; - - see :ref:`ColDataTypes `. In Columnstore 1.1, only one column is supported, so colTyoes will be of length one. + see :ref:`ColumnDatum`. In Columnstore 1.2, An arbitrary number of parameters is supported. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS -The init() method is where you sanity check the input, set the output type and set any run flags for this instance. init() is called one time from the mysqld process. All settings you do here are propagated through the system. +The init() method is where you sanity check the input datatypes, set the output type and set any run flags for this instance. init() is called one time from the mysqld process. All settings you do here are propagated through the system. init() is the exception to type casting the UserData member of context. UserData has not been created when init() is called, so you shouldn't use it here. @@ -60,13 +56,14 @@ If you're using :ref:`simpledatamodel`, you need to set the size of the structur .. rubric:: Check parameter count and type -Each function expects a certain number of columns to entered as parameters in the SQL query. For columnstore 1.1, the number of parameters is limited to one. +Each function expects a certain number of columns to be entered as parameters in the SQL query. It is possible to create a UDAF that accepts a variable number of parameters. You can discover which ones were actually used in init(), and modify your function's behavior accordingly. -colTypes is a vector of each parameter name and type. The name is the colum name from the SQL query. You can use this information to sanity check for compatible type(s) and also to modify your functions behavior based on type. To do this, add members to your data struct to be tested in the other Methods. Set these members based on colDataTypes (:ref:`ColDataTypes `). +colTypes is an array of ColumnData from which can be gleaned the type and name. The name is the column name from the SQL query. You can use this information to sanity check for compatible type(s) and also to modify your functions behavior based on type. To do this, add members to your data struct to be tested in the other Methods. Set these members based on colDataTypes (:ref:`ColDataTypes `). +The actual number of paramters passed can be gotten from context->getParameterCount(). :: - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -84,7 +81,7 @@ When you create your function using the SQL CREATE FUNCTION command, you must in .. rubric:: Set width and scale -If you have secial requirements, especially if you might be dealing with decimal types:: +If you have special requirements, especially if you might be dealing with decimal types:: context->setColWidth(8); context->setScale(context->getScale()*2); @@ -117,13 +114,11 @@ This function may be called multiple times from both the UM and the PM. Make no nextValue() ----------- -.. c:function:: ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); +.. c:function:: ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); :param context: The context object for this call -:param valsIn: a vector representing the values to be added for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. +:param valsIn: an array representing the values to be added for each parameter for this row. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS @@ -208,7 +203,7 @@ For AVG, you might see:: dropValue --------- -.. c:function:: ReturnCode dropValue(mcsv1Context* context, std::vector& valsDropped); +.. c:function:: ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); :param context: The context object for this call diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index fe1f3fd0e..3d3ac39ca 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -238,5 +238,38 @@ N="Makefile" Type="Makefile"/> + + + + + + + + + + + + + + + + + + + + + + + + + + + From 59858aa8962ff857df829b0b65dfc6016eeab8a7 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Thu, 17 May 2018 10:01:17 +0300 Subject: [PATCH 009/123] MCOL-1415 Fixed regression with extra spaces after dot in qualified identifiers. --- dbcon/ddlpackage/ddl.y | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbcon/ddlpackage/ddl.y b/dbcon/ddlpackage/ddl.y index 982167287..398a8612f 100644 --- a/dbcon/ddlpackage/ddl.y +++ b/dbcon/ddlpackage/ddl.y @@ -631,6 +631,10 @@ qualified_name: else $$ = new QualifiedName($1); } + | IDENT '.' IDENT + { + $$ = new QualifiedName($1, $3); + } ; ata_add_column: From 51df837b4ea89acaac48b87cdebbebb27e5d70eb Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 11 May 2018 09:50:10 -0500 Subject: [PATCH 010/123] MCOL-1201 manual rebase with develop. Obsoletes branch MCOL-1201 --- dbcon/execplan/aggregatecolumn.cpp | 96 +-- dbcon/execplan/aggregatecolumn.h | 44 +- dbcon/joblist/expressionstep.cpp | 12 +- dbcon/joblist/expressionstep.h | 1 + dbcon/joblist/groupconcat.cpp | 2 +- dbcon/joblist/joblistfactory.cpp | 531 ++++++++---- dbcon/joblist/tupleaggregatestep.cpp | 280 +++++-- dbcon/mysql/ha_calpont_execplan.cpp | 858 +++++++++++--------- dbcon/mysql/ha_calpont_impl.cpp | 9 +- dbcon/mysql/ha_window_function.cpp | 37 +- utils/common/any.hpp | 270 +++--- utils/rowgroup/rowaggregation.cpp | 605 +++++++++----- utils/rowgroup/rowaggregation.h | 29 +- utils/udfsdk/CMakeLists.txt | 2 +- utils/udfsdk/allnull.cpp | 7 +- utils/udfsdk/allnull.h | 4 +- utils/udfsdk/avg_mode.cpp | 14 +- utils/udfsdk/avg_mode.h | 14 +- utils/udfsdk/mcsv1_udaf.cpp | 13 +- utils/udfsdk/mcsv1_udaf.h | 88 +- utils/udfsdk/median.cpp | 14 +- utils/udfsdk/median.h | 8 +- utils/udfsdk/ssq.cpp | 14 +- utils/udfsdk/ssq.h | 8 +- utils/udfsdk/udfmysql.cpp | 162 ++++ utils/udfsdk/udfsdk.vpj | 4 + utils/windowfunction/wf_udaf.cpp | 280 +++++-- utils/windowfunction/wf_udaf.h | 27 +- utils/windowfunction/windowfunctiontype.cpp | 8 +- writeengine/wrapper/writeengine.cpp | 10 +- 30 files changed, 2255 insertions(+), 1196 deletions(-) diff --git a/dbcon/execplan/aggregatecolumn.cpp b/dbcon/execplan/aggregatecolumn.cpp index 18cba2607..5bce12d79 100644 --- a/dbcon/execplan/aggregatecolumn.cpp +++ b/dbcon/execplan/aggregatecolumn.cpp @@ -98,36 +98,6 @@ AggregateColumn::AggregateColumn(const uint32_t sessionID): { } -AggregateColumn::AggregateColumn(const AggOp aggOp, ReturnedColumn* parm, const uint32_t sessionID): - ReturnedColumn(sessionID), - fAggOp(aggOp), - fAsc(false), - fData(aggOp + "(" + parm->data() + ")") -{ - fFunctionParms.reset(parm); -} - -AggregateColumn::AggregateColumn(const AggOp aggOp, const string& content, const uint32_t sessionID): - ReturnedColumn(sessionID), - fAggOp(aggOp), - fAsc(false), - fData(aggOp + "(" + content + ")") -{ - // TODO: need to handle distinct - fFunctionParms.reset(new ArithmeticColumn(content)); -} - -// deprecated constructor. use function name as string -AggregateColumn::AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID): - ReturnedColumn(sessionID), - fFunctionName(functionName), - fAggOp(NOOP), - fAsc(false), - fData(functionName + "(" + parm->data() + ")") -{ - fFunctionParms.reset(parm); -} - // deprecated constructor. use function name as string AggregateColumn::AggregateColumn(const string& functionName, const string& content, const uint32_t sessionID): ReturnedColumn(sessionID), @@ -137,20 +107,21 @@ AggregateColumn::AggregateColumn(const string& functionName, const string& conte fData(functionName + "(" + content + ")") { // TODO: need to handle distinct - fFunctionParms.reset(new ArithmeticColumn(content)); + SRCP srcp(new ArithmeticColumn(content)); + fAggParms.push_back(srcp); } AggregateColumn::AggregateColumn( const AggregateColumn& rhs, const uint32_t sessionID ): ReturnedColumn(rhs, sessionID), fFunctionName (rhs.fFunctionName), fAggOp(rhs.fAggOp), - fFunctionParms(rhs.fFunctionParms), fTableAlias(rhs.tableAlias()), fAsc(rhs.asc()), fData(rhs.data()), fConstCol(rhs.fConstCol) { fAlias = rhs.alias(); + fAggParms = rhs.fAggParms; } /** @@ -166,10 +137,14 @@ const string AggregateColumn::toString() const if (fAlias.length() > 0) output << "/Alias: " << fAlias << endl; - if (fFunctionParms == 0) - output << "No arguments" << endl; + if (fAggParms.size() == 0) + output << "No arguments"; else - output << *fFunctionParms << endl; + for (uint32_t i = 0; i < fAggParms.size(); ++i) + { + output << *(fAggParms[i]) << " "; + } + output << endl; if (fConstCol) output << *fConstCol; @@ -191,10 +166,11 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const b << fFunctionName; b << static_cast(fAggOp); - if (fFunctionParms == 0) - b << (uint8_t) ObjectReader::NULL_CLASS; - else - fFunctionParms->serialize(b); + b << static_cast(fAggParms.size()); + for (uint32_t i = 0; i < fAggParms.size(); ++i) + { + fAggParms[i]->serialize(b); + } b << static_cast(fGroupByColList.size()); @@ -219,20 +195,26 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const void AggregateColumn::unserialize(messageqcpp::ByteStream& b) { - ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN); - fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end()); - fProjectColList.erase(fProjectColList.begin(), fProjectColList.end()); - ReturnedColumn::unserialize(b); - b >> fFunctionName; - b >> fAggOp; - //delete fFunctionParms; - fFunctionParms.reset( - dynamic_cast(ObjectReader::createTreeNode(b))); - messageqcpp::ByteStream::quadbyte size; messageqcpp::ByteStream::quadbyte i; ReturnedColumn* rc; + ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN); + fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end()); + fProjectColList.erase(fProjectColList.begin(), fProjectColList.end()); + fAggParms.erase(fAggParms.begin(), fAggParms.end()); + ReturnedColumn::unserialize(b); + b >> fFunctionName; + b >> fAggOp; + + b >> size; + for (i = 0; i < size; i++) + { + rc = dynamic_cast(ObjectReader::createTreeNode(b)); + SRCP srcp(rc); + fAggParms.push_back(srcp); + } + b >> size; for (i = 0; i < size; i++) @@ -261,6 +243,7 @@ void AggregateColumn::unserialize(messageqcpp::ByteStream& b) bool AggregateColumn::operator==(const AggregateColumn& t) const { const ReturnedColumn* rc1, *rc2; + AggParms::const_iterator it, it2; rc1 = static_cast(this); rc2 = static_cast(&t); @@ -277,16 +260,18 @@ bool AggregateColumn::operator==(const AggregateColumn& t) const if (fAggOp != t.fAggOp) return false; - if (fFunctionParms.get() != NULL && t.fFunctionParms.get() != NULL) + if (aggParms().size() != t.aggParms().size()) { - if (*fFunctionParms.get() != t.fFunctionParms.get()) + return false; + } + for (it = fAggParms.begin(), it2 = t.fAggParms.begin(); + it != fAggParms.end(); + ++it, ++it2) + { + if (**it != **it2) return false; } - else if (fFunctionParms.get() != NULL || t.fFunctionParms.get() != NULL) - return false; - //if (fAlias != t.fAlias) - // return false; if (fTableAlias != t.fTableAlias) return false; @@ -645,3 +630,4 @@ AggregateColumn::AggOp AggregateColumn::agname2num(const string& agname) } } // namespace execplan + diff --git a/dbcon/execplan/aggregatecolumn.h b/dbcon/execplan/aggregatecolumn.h index d1db7e5a4..b0884f179 100644 --- a/dbcon/execplan/aggregatecolumn.h +++ b/dbcon/execplan/aggregatecolumn.h @@ -40,6 +40,8 @@ class ByteStream; namespace execplan { +typedef std::vector AggParms; + /** * @brief A class to represent a aggregate return column * @@ -74,7 +76,8 @@ public: BIT_OR, BIT_XOR, GROUP_CONCAT, - UDAF + UDAF, + MULTI_PARM }; /** @@ -94,21 +97,6 @@ public: */ AggregateColumn(const uint32_t sessionID); - /** - * ctor - */ - AggregateColumn(const AggOp aggop, ReturnedColumn* parm, const uint32_t sessionID = 0); - - /** - * ctor - */ - AggregateColumn(const AggOp aggop, const std::string& content, const uint32_t sessionID = 0); - - /** - * ctor - */ - AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID = 0); - /** * ctor */ @@ -155,24 +143,27 @@ public: fAggOp = aggOp; } + /** get function parms - * - * set the function parms from this object */ - virtual const SRCP functionParms() const + virtual AggParms& aggParms() { - return fFunctionParms; + return fAggParms; + } + + virtual const AggParms& aggParms() const + { + return fAggParms; } /** set function parms - * - * set the function parms for this object */ - virtual void functionParms(const SRCP& functionParms) + virtual void aggParms(const AggParms& parms) { - fFunctionParms = functionParms; + fAggParms = parms; } + /** return a copy of this pointer * * deep copy of this pointer and return the copy @@ -325,9 +316,10 @@ protected: uint8_t fAggOp; /** - * A ReturnedColumn objects that are the arguments to this function + * ReturnedColumn objects that are the arguments to this + * function */ - SRCP fFunctionParms; + AggParms fAggParms; /** table alias * A string to represent table alias name which contains this column diff --git a/dbcon/joblist/expressionstep.cpp b/dbcon/joblist/expressionstep.cpp index 0e064c359..4a8a14ff3 100644 --- a/dbcon/joblist/expressionstep.cpp +++ b/dbcon/joblist/expressionstep.cpp @@ -56,6 +56,17 @@ using namespace rowgroup; namespace joblist { +ExpressionStep::ExpressionStep() : + fExpressionFilter(NULL), + fExpressionId(-1), + fVarBinOK(false), + fSelectFilter(false), + fAssociatedJoinId(0), + fDoJoin(false), + fVirtual(false) +{ +} + ExpressionStep::ExpressionStep(const JobInfo& jobInfo) : JobStep(jobInfo), fExpressionFilter(NULL), @@ -68,7 +79,6 @@ ExpressionStep::ExpressionStep(const JobInfo& jobInfo) : { } - ExpressionStep::ExpressionStep(const ExpressionStep& rhs) : JobStep(rhs), fExpression(rhs.expression()), diff --git a/dbcon/joblist/expressionstep.h b/dbcon/joblist/expressionstep.h index 4a069440f..63423fc7d 100644 --- a/dbcon/joblist/expressionstep.h +++ b/dbcon/joblist/expressionstep.h @@ -50,6 +50,7 @@ class ExpressionStep : public JobStep { public: // constructors + ExpressionStep(); ExpressionStep(const JobInfo&); // destructor constructors virtual ~ExpressionStep(); diff --git a/dbcon/joblist/groupconcat.cpp b/dbcon/joblist/groupconcat.cpp index 234fc0a8e..afc91a2ec 100644 --- a/dbcon/joblist/groupconcat.cpp +++ b/dbcon/joblist/groupconcat.cpp @@ -78,7 +78,7 @@ void GroupConcatInfo::prepGroupConcat(JobInfo& jobInfo) while (i != jobInfo.groupConcatCols.end()) { GroupConcatColumn* gcc = dynamic_cast(i->get()); - const RowColumn* rcp = dynamic_cast(gcc->functionParms().get()); + const RowColumn* rcp = dynamic_cast(gcc->aggParms()[0].get()); SP_GroupConcat groupConcat(new GroupConcat); groupConcat->fSeparator = gcc->separator(); diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index a48ecd13a..4cf7bccc5 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -18,7 +18,6 @@ // $Id: joblistfactory.cpp 9632 2013-06-18 22:18:20Z xlou $ - #include #include #include @@ -870,7 +869,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo if (gcc != NULL) { - srcp = gcc->functionParms(); + srcp = gcc->aggParms()[0]; const RowColumn* rcp = dynamic_cast(srcp.get()); const vector& cols = rcp->columnVec(); @@ -891,21 +890,55 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } +#if 0 + // MCOL-1201 Add support for multi-parameter UDAnF + UDAFColumn* udafc = dynamic_cast(retCols[i].get()); + if (udafc != NULL) + { + srcp = udafc->aggParms()[0]; + const RowColumn* rcp = dynamic_cast(srcp.get()); + const vector& cols = rcp->columnVec(); + for (vector::const_iterator j = cols.begin(); j != cols.end(); j++) + { + srcp = *j; + if (dynamic_cast(srcp.get()) == NULL) + retCols.push_back(srcp); + + // Do we need this? + const ArithmeticColumn* ac = dynamic_cast(srcp.get()); + const FunctionColumn* fc = dynamic_cast(srcp.get()); + if (ac != NULL || fc != NULL) + { + // bug 3728, make a dummy expression step for each expression. + scoped_ptr es(new ExpressionStep(jobInfo)); + es->expression(srcp, jobInfo); + } + } + continue; + } +#endif srcp = retCols[i]; const AggregateColumn* ag = dynamic_cast(retCols[i].get()); - - if (ag != NULL) - srcp = ag->functionParms(); - - const ArithmeticColumn* ac = dynamic_cast(srcp.get()); - const FunctionColumn* fc = dynamic_cast(srcp.get()); - - if (ac != NULL || fc != NULL) + // bug 3728 Make a dummy expression for srcp if it is an + // expression. This is needed to fill in some stuff. + // Note that es.expression does nothing if the item is not an expression. + if (ag == NULL) { - // bug 3728, make a dummy expression step for each expression. - scoped_ptr es(new ExpressionStep(jobInfo)); - es->expression(srcp, jobInfo); + // Not an aggregate. Make a dummy expression for the item + ExpressionStep es; + es.expression(srcp, jobInfo); + } + else + { + // MCOL-1201 multi-argument aggregate. make a dummy expression + // step for each argument that is an expression. + for (uint32_t i = 0; i < ag->aggParms().size(); ++i) + { + srcp = ag->aggParms()[i]; + ExpressionStep es; + es.expression(srcp, jobInfo); + } } } @@ -915,17 +948,18 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo { srcp = retCols[i]; const SimpleColumn* sc = dynamic_cast(srcp.get()); + AggregateColumn* aggc = dynamic_cast(srcp.get()); bool doDistinct = (csep->distinct() && csep->groupByCols().empty()); uint32_t tupleKey = -1; string alias; string view; - // returned column could be groupby column, a simplecoulumn not a agregatecolumn + // returned column could be groupby column, a simplecoulumn not an aggregatecolumn int op = 0; CalpontSystemCatalog::OID dictOid = 0; CalpontSystemCatalog::ColType ct, aggCt; - if (sc == NULL) + if (aggc) { GroupConcatColumn* gcc = dynamic_cast(retCols[i].get()); @@ -939,7 +973,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo tupleKey = ti.key; jobInfo.returnedColVec.push_back(make_pair(tupleKey, gcc->aggOp())); // not a tokenOnly column. Mark all the columns involved - srcp = gcc->functionParms(); + srcp = gcc->aggParms()[0]; const RowColumn* rowCol = dynamic_cast(srcp.get()); if (rowCol) @@ -963,186 +997,353 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } - - AggregateColumn* ac = dynamic_cast(retCols[i].get()); - - if (ac != NULL) + else { - srcp = ac->functionParms(); - sc = dynamic_cast(srcp.get()); + // Aggregate column not group concat + AggParms& aggParms = aggc->aggParms(); - if (ac->constCol().get() != NULL) + for (uint32_t parm = 0; parm < aggParms.size(); ++parm) { - // replace the aggregate on constant with a count(*) - SRCP clone; - UDAFColumn* udafc = dynamic_cast(ac); - - if (udafc) + if (aggc->constCol().get() != NULL) { - clone.reset(new UDAFColumn(*udafc, ac->sessionID())); + // replace the aggregate on constant with a count(*) + SRCP clone; + UDAFColumn* udafc = dynamic_cast(aggc); + + if (udafc) + { + clone.reset(new UDAFColumn(*udafc, aggc->sessionID())); + } + else + { + clone.reset(new AggregateColumn(*aggc, aggc->sessionID())); + } + + jobInfo.constAggregate.insert(make_pair(i, clone)); + aggc->aggOp(AggregateColumn::COUNT_ASTERISK); + aggc->distinct(false); + } + + srcp = aggParms[parm]; + sc = dynamic_cast(srcp.get()); + if (parm == 0) + { + op = aggc->aggOp(); } else { - clone.reset(new AggregateColumn(*ac, ac->sessionID())); + op = AggregateColumn::MULTI_PARM; + } + doDistinct = aggc->distinct(); + if (aggParms.size() == 1) + { + // Set the col type based on the single parm. + // Changing col type based on a parm if multiple parms + // doesn't really make sense. + updateAggregateColType(aggc, srcp, op, jobInfo); + } + aggCt = aggc->resultType(); + + // As of bug3695, make sure varbinary is not used in aggregation. + // TODO: allow for UDAF + if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY) + throw runtime_error ("VARBINARY in aggregate function is not supported."); + + // Project the parm columns or expressions + if (sc != NULL) + { + CalpontSystemCatalog::OID retOid = sc->oid(); + CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); + alias = extractTableAlias(sc); + view = sc->viewName(); + + if (!sc->schemaName().empty()) + { + ct = sc->colType(); + + //XXX use this before connector sets colType in sc correctly. + if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) + ct = jobInfo.csc->colType(sc->oid()); + + //X + dictOid = isDictCol(ct); + } + else + { + retOid = (tblOid + 1) + sc->colPosition(); + ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; + } + + TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); + tupleKey = ti.key; + + // this is a string column + if (dictOid > 0) + { + map::iterator findit = jobInfo.tokenOnly.find(tupleKey); + + // if the column has never seen, and the op is count: possible need count only. + if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) + { + if (findit == jobInfo.tokenOnly.end()) + jobInfo.tokenOnly[tupleKey] = true; + } + // if aggregate other than count, token is not enough. + else if (op != 0 || doDistinct) + { + jobInfo.tokenOnly[tupleKey] = false; + } + + findit = jobInfo.tokenOnly.find(tupleKey); + + if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) + { + dictMap[tupleKey] = dictOid; + jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; + ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); + jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + } + } + } + else + { + const ArithmeticColumn* ac = NULL; + const FunctionColumn* fc = NULL; + const WindowFunctionColumn* wc = NULL; + bool hasAggCols = false; + + if ((ac = dynamic_cast(srcp.get())) != NULL) + { + if (ac->aggColumnList().size() > 0) + hasAggCols = true; + } + else if ((fc = dynamic_cast(srcp.get())) != NULL) + { + if (fc->aggColumnList().size() > 0) + hasAggCols = true; + } + else if (dynamic_cast(srcp.get()) != NULL) + { + std::ostringstream errmsg; + errmsg << "Invalid aggregate function nesting."; + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + else if (dynamic_cast(srcp.get()) != NULL) + { + } + else if ((wc = dynamic_cast(srcp.get())) == NULL) + { + std::ostringstream errmsg; + errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + + uint64_t eid = srcp.get()->expressionId(); + ct = srcp.get()->resultType(); + TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); + tupleKey = ti.key; + + if (hasAggCols) + jobInfo.expressionVec.push_back(tupleKey); } - jobInfo.constAggregate.insert(make_pair(i, clone)); - ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ac->distinct(false); - } + // add to project list + vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - op = ac->aggOp(); - doDistinct = ac->distinct(); - updateAggregateColType(ac, srcp, op, jobInfo); - aggCt = ac->resultType(); + if (keyIt == projectKeys.end()) + { + RetColsVector::iterator it = pcv.end(); - // As of bug3695, make sure varbinary is not used in aggregation. - if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY) - throw runtime_error ("VARBINARY in aggregate function is not supported."); - } - } + if (doDistinct) + it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); + else + it = pcv.insert(pcv.end(), srcp); - // simple column selected or aggregated - if (sc != NULL) - { - // one column only need project once - CalpontSystemCatalog::OID retOid = sc->oid(); - CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); - alias = extractTableAlias(sc); - view = sc->viewName(); + projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); + } + else if (doDistinct) // @bug4250, move forward distinct column if necessary. + { + uint32_t pos = distance(projectKeys.begin(), keyIt); - if (!sc->schemaName().empty()) - { - ct = sc->colType(); + if (pos >= lastGroupByPos) + { + pcv[pos] = pcv[lastGroupByPos]; + pcv[lastGroupByPos] = srcp; + projectKeys[pos] = projectKeys[lastGroupByPos]; + projectKeys[lastGroupByPos] = tupleKey; + lastGroupByPos++; + } + } -//XXX use this before connector sets colType in sc correctly. - if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) - ct = jobInfo.csc->colType(sc->oid()); + if (doDistinct && dictOid > 0) + tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; -//X - dictOid = isDictCol(ct); - } - else - { - retOid = (tblOid + 1) + sc->colPosition(); - ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; - } + // remember the columns to be returned + jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); - TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); - tupleKey = ti.key; + if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) + jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; - // this is a string column - if (dictOid > 0) - { - map::iterator findit = jobInfo.tokenOnly.find(tupleKey); - - // if the column has never seen, and the op is count: possible need count only. - if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) - { - if (findit == jobInfo.tokenOnly.end()) - jobInfo.tokenOnly[tupleKey] = true; - } - // if aggregate other than count, token is not enough. - else if (op != 0 || doDistinct) - { - jobInfo.tokenOnly[tupleKey] = false; - } - - findit = jobInfo.tokenOnly.find(tupleKey); - - if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) - { - dictMap[tupleKey] = dictOid; - jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; - ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); - jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } } else { - const ArithmeticColumn* ac = NULL; - const FunctionColumn* fc = NULL; - const WindowFunctionColumn* wc = NULL; - bool hasAggCols = false; - - if ((ac = dynamic_cast(srcp.get())) != NULL) + // Not an Aggregate + // simple column selected + if (sc != NULL) { - if (ac->aggColumnList().size() > 0) - hasAggCols = true; + // one column only need project once + CalpontSystemCatalog::OID retOid = sc->oid(); + CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); + alias = extractTableAlias(sc); + view = sc->viewName(); + + if (!sc->schemaName().empty()) + { + ct = sc->colType(); + + //XXX use this before connector sets colType in sc correctly. + if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) + ct = jobInfo.csc->colType(sc->oid()); + + //X + dictOid = isDictCol(ct); + } + else + { + retOid = (tblOid + 1) + sc->colPosition(); + ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; + } + + TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); + tupleKey = ti.key; + + // this is a string column + if (dictOid > 0) + { + map::iterator findit = jobInfo.tokenOnly.find(tupleKey); + + // if the column has never seen, and the op is count: possible need count only. + if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) + { + if (findit == jobInfo.tokenOnly.end()) + jobInfo.tokenOnly[tupleKey] = true; + } + // if aggregate other than count, token is not enough. + else if (op != 0 || doDistinct) + { + jobInfo.tokenOnly[tupleKey] = false; + } + + findit = jobInfo.tokenOnly.find(tupleKey); + + if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) + { + dictMap[tupleKey] = dictOid; + jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; + ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); + jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + } + } } - else if ((fc = dynamic_cast(srcp.get())) != NULL) - { - if (fc->aggColumnList().size() > 0) - hasAggCols = true; - } - else if (dynamic_cast(srcp.get()) != NULL) - { - std::ostringstream errmsg; - errmsg << "Invalid aggregate function nesting."; - cerr << boldStart << errmsg.str() << boldStop << endl; - throw logic_error(errmsg.str()); - } - else if ((wc = dynamic_cast(srcp.get())) == NULL) - { - std::ostringstream errmsg; - errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); - cerr << boldStart << errmsg.str() << boldStop << endl; - throw logic_error(errmsg.str()); - } - - uint64_t eid = srcp.get()->expressionId(); - ct = srcp.get()->resultType(); - TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); - tupleKey = ti.key; - - if (hasAggCols) - jobInfo.expressionVec.push_back(tupleKey); - } - - // add to project list - vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - - if (keyIt == projectKeys.end()) - { - RetColsVector::iterator it = pcv.end(); - - if (doDistinct) - it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); else - it = pcv.insert(pcv.end(), srcp); - - projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); - } - else if (doDistinct) // @bug4250, move forward distinct column if necessary. - { - uint32_t pos = distance(projectKeys.begin(), keyIt); - - if (pos >= lastGroupByPos) { - pcv[pos] = pcv[lastGroupByPos]; - pcv[lastGroupByPos] = srcp; - projectKeys[pos] = projectKeys[lastGroupByPos]; - projectKeys[lastGroupByPos] = tupleKey; - lastGroupByPos++; + const ArithmeticColumn* ac = NULL; + const FunctionColumn* fc = NULL; + const WindowFunctionColumn* wc = NULL; + bool hasAggCols = false; + + if ((ac = dynamic_cast(srcp.get())) != NULL) + { + if (ac->aggColumnList().size() > 0) + hasAggCols = true; + } + else if ((fc = dynamic_cast(srcp.get())) != NULL) + { + if (fc->aggColumnList().size() > 0) + hasAggCols = true; + } + else if (dynamic_cast(srcp.get()) != NULL) + { + std::ostringstream errmsg; + errmsg << "Invalid aggregate function nesting."; + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + else if (dynamic_cast(srcp.get()) != NULL) + { + } + else if ((wc = dynamic_cast(srcp.get())) == NULL) + { + std::ostringstream errmsg; + errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + + uint64_t eid = srcp.get()->expressionId(); + ct = srcp.get()->resultType(); + TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); + tupleKey = ti.key; + + if (hasAggCols) + jobInfo.expressionVec.push_back(tupleKey); } - } - if (doDistinct && dictOid > 0) - tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + // add to project list + vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - // remember the columns to be returned - jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + if (keyIt == projectKeys.end()) + { + RetColsVector::iterator it = pcv.end(); - if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) - jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + if (doDistinct) + it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); + else + it = pcv.insert(pcv.end(), srcp); - // bug 1499 distinct processing, save unique distinct columns - if (doDistinct && - (jobInfo.distinctColVec.end() == - find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) - { - jobInfo.distinctColVec.push_back(tupleKey); + projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); + } + else if (doDistinct) // @bug4250, move forward distinct column if necessary. + { + uint32_t pos = distance(projectKeys.begin(), keyIt); + + if (pos >= lastGroupByPos) + { + pcv[pos] = pcv[lastGroupByPos]; + pcv[lastGroupByPos] = srcp; + projectKeys[pos] = projectKeys[lastGroupByPos]; + projectKeys[lastGroupByPos] = tupleKey; + lastGroupByPos++; + } + } + + if (doDistinct && dictOid > 0) + tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + + // remember the columns to be returned + jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + + if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) + jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 9e23ac17b..ff490da5b 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -164,6 +164,9 @@ inline RowAggFunctionType functionIdMap(int planFuncId) case AggregateColumn::UDAF: return ROWAGG_UDAF; + case AggregateColumn::MULTI_PARM: + return ROWAGG_MULTI_PARM; + default: return ROWAGG_FUNCT_UNDEFINE; } @@ -1302,7 +1305,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -1468,7 +1471,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (!udafFuncCol) { - throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); @@ -1483,6 +1486,17 @@ void TupleAggregateStep::prep1PhaseAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(key); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + widthAgg.push_back(width[colProj]); + } + break; + default: { ostringstream emsg; @@ -1560,7 +1574,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(3)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVec[i]->fAuxColumnIndex = lastCol++; @@ -1675,7 +1689,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation map projColPosMap; @@ -1848,7 +1862,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -2043,7 +2057,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (!udafFuncCol) { - throw logic_error("prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); } // Return column @@ -2065,6 +2079,18 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(aggKey); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + widthAgg.push_back(widthProj[colProj]); + ++colAgg; + } + break; + default: { ostringstream emsg; @@ -2111,7 +2137,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( groupByNoDist.push_back(groupby); aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i)); } - + + projColsUDAFIndex = 0; // locate the return column position in aggregated rowgroup for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -2121,6 +2148,14 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colAgg = -1; + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + + if (udafc) + pUDAFFunc = udafc->getContext().getFunction(); + } + if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { @@ -2432,11 +2467,37 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { - SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + // update the aggregate function vector + SP_ROWAGG_FUNC_t funct; + if (aggOp == ROWAGG_UDAF) + { + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } + } + else + { + funct.reset(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + } if (aggOp == ROWAGG_COUNT_NO_OP) funct->fAuxColumnIndex = colAgg; @@ -2549,7 +2610,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep1PhaseDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVec2[i]->fAuxColumnIndex = lastCol++; @@ -2893,7 +2954,7 @@ void TupleAggregateStep::prep2PhasesAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation vector width; map projColPosMap; @@ -3036,12 +3097,11 @@ void TupleAggregateStep::prep2PhasesAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAggPm)); break; } - } if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -3240,7 +3300,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } oidsAggPm.push_back(oidsProj[colProj]); @@ -3261,6 +3321,18 @@ void TupleAggregateStep::prep2PhasesAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(scaleProj[colProj]); + precisionAggPm.push_back(precisionProj[colProj]); + typeAggPm.push_back(typeProj[colProj]); + widthAggPm.push_back(width[colProj]); + colAggPm++; + } + break; + default: { ostringstream emsg; @@ -3278,11 +3350,16 @@ void TupleAggregateStep::prep2PhasesAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap; AGG_MAP aggDupFuncMap; + projColsUDAFIndex = 0; // copy over the groupby vector // update the outputColumnIndex if returned for (uint64_t i = 0; i < groupByPm.size(); i++) @@ -3299,7 +3376,14 @@ void TupleAggregateStep::prep2PhasesAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colPm = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + ++multiParms; + continue; + } // Is this a UDAF? use the function as part of the key. + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; if (aggOp == ROWAGG_UDAF) @@ -3452,20 +3536,36 @@ void TupleAggregateStep::prep2PhasesAggregate( functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { + // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i)); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i-multiParms)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i-multiParms)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -3517,7 +3617,7 @@ void TupleAggregateStep::prep2PhasesAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = returnedColVec.size() - multiParms; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -3545,7 +3645,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; @@ -3691,6 +3791,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector groupByPm, groupByUm, groupByNoDist; vector functionVecPm, functionNoDistVec, functionVecUm; + list multiParmIndexes; uint32_t bigIntWidth = sizeof(int64_t); map, uint64_t> avgFuncDistMap; @@ -3702,7 +3803,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation vector width; map projColPosMap; @@ -3856,7 +3957,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -4050,7 +4151,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } // Return column @@ -4072,6 +4173,19 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(scaleProj[colProj]); + precisionAggPm.push_back(precisionProj[colProj]); + typeAggPm.push_back(typeProj[colProj]); + widthAggPm.push_back(width[colProj]); + multiParmIndexes.push_back(colAggPm); + colAggPm++; + } + break; + default: { ostringstream emsg; @@ -4093,12 +4207,23 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( groupByUm.push_back(groupby); } + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; for (uint32_t idx = 0; idx < functionVecPm.size(); idx++) + { SP_ROWAGG_FUNC_t funct; SP_ROWAGG_FUNC_t funcPm = functionVecPm[idx]; // UDAF support + if (funcPm->fAggFunction == ROWAGG_MULTI_PARM) + { + // Multi-Parm is not used on the UM + ++multiParms; + continue; + } if (funcPm->fAggFunction == ROWAGG_UDAF) { RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); @@ -4106,7 +4231,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); } else @@ -4116,18 +4241,25 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funcPm->fStatsFunction, funcPm->fOutputColumnIndex, funcPm->fOutputColumnIndex, - funcPm->fAuxColumnIndex)); + funcPm->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); } } - posAggUm = posAggPm; - oidsAggUm = oidsAggPm; - keysAggUm = keysAggPm; - scaleAggUm = scaleAggPm; - precisionAggUm = precisionAggPm; - widthAggUm = widthAggPm; - typeAggUm = typeAggPm; + // Copy over the PM arrays to the UM. Skip any that are a multi-parm entry. + for (uint32_t idx = 0; idx < oidsAggPm.size(); ++idx) + { + if (find (multiParmIndexes.begin(), multiParmIndexes.end(), idx ) != multiParmIndexes.end()) + { + continue; + } + oidsAggUm.push_back(oidsAggPm[idx]); + keysAggUm.push_back(keysAggPm[idx]); + scaleAggUm.push_back(scaleAggPm[idx]); + precisionAggUm.push_back(precisionAggPm[idx]); + widthAggUm.push_back(widthAggPm[idx]); + typeAggUm.push_back(typeAggPm[idx]); + } } @@ -4137,6 +4269,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4159,6 +4295,21 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colUm = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + ++multiParms; + continue; + } + + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + + if (udafc) + pUDAFFunc = udafc->getContext().getFunction(); + } + if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { @@ -4285,7 +4436,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it != aggFuncMap.end()) { - colUm = it->second; + colUm = it->second - multiParms; oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(keysAggUm[colUm]); scaleAggDist.push_back(scaleAggUm[colUm]); @@ -4309,7 +4460,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // false alarm returnColMissing = false; - colUm = it->second; + colUm = it->second - multiParms; if (aggOp == ROWAGG_SUM) { @@ -4412,21 +4563,36 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { + // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - pUDAFFunc = udafc->getContext().getFunction(); - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i)); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i-multiParms)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i-multiParms)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -4480,7 +4646,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = returnedColVec.size() - multiParms; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -4540,7 +4706,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; @@ -4687,6 +4853,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); groupBySub.push_back(groupby); + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; + // tricky part : 2 function vectors // -- dummy function vector for sub-aggregator, which does distinct only // -- aggregate function on this distinct column for rowAggDist @@ -4694,6 +4865,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } if (returnedColVec[k].first != distinctColKey) continue; @@ -4715,7 +4891,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -4732,9 +4908,15 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( { vector functionSub1 = functionNoDistVec; vector functionSub2; + int64_t multiParms = 0; for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } // search non-distinct functions in functionVec vector::iterator it = functionVecUm.begin(); @@ -4752,7 +4934,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } else if (f->fAggFunction == ROWAGG_COUNT_ASTERISK || @@ -4773,7 +4955,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index fac0cd032..5c1989d51 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4038,6 +4038,10 @@ ParseTree* buildParseTree(Item_func* item, gp_walk_info& gwi, bool& nonSupport) ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { + // MCOL-1201 For UDAnF multiple parameters + vector selCols; + vector orderCols; + if (!(gwi.thd->infinidb_vtable.cal_conn_info)) gwi.thd->infinidb_vtable.cal_conn_info = (void*)(new cal_connection_info()); @@ -4054,6 +4058,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // N.B. argument_count() is the # of formal parms to the agg fcn. InifniDB only supports 1 argument // TODO: Support more than one parm +#if 0 if (isp->argument_count() != 1 && isp->sum_func() != Item_sum::GROUP_CONCAT_FUNC && isp->sum_func() != Item_sum::UDF_SUM_FUNC) { @@ -4061,7 +4066,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_MUL_ARG_AGG); return NULL; } - +#endif AggregateColumn* ac = NULL; if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) @@ -4084,444 +4089,509 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { gwi.fatalParseError = true; gwi.parseErrorText = "Non supported aggregate type on the select clause"; + if (ac) + delete ac; return NULL; } - // special parsing for group_concat - if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + try { - Item_func_group_concat* gc = (Item_func_group_concat*)isp; + + // special parsing for group_concat + if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + Item_func_group_concat* gc = (Item_func_group_concat*)isp; vector orderCols; - RowColumn* rowCol = new RowColumn(); + RowColumn* rowCol = new RowColumn(); vector selCols; - uint32_t select_ctn = gc->count_field(); - ReturnedColumn* rc = NULL; + uint32_t select_ctn = gc->count_field(); + ReturnedColumn* rc = NULL; - for (uint32_t i = 0; i < select_ctn; i++) - { - rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); - - if (!rc || gwi.fatalParseError) - return NULL; - - selCols.push_back(SRCP(rc)); - } - - ORDER** order_item, **end; - - for (order_item = gc->get_order(), - end = order_item + gc->order_field(); order_item < end; - order_item++) - { - Item* ord_col = *(*order_item)->item; - - if (ord_col->type() == Item::INT_ITEM) + for (uint32_t i = 0; i < select_ctn; i++) { - Item_int* id = (Item_int*)ord_col; - - if (id->val_int() > (int)selCols.size()) - { - gwi.fatalParseError = true; - return NULL; - } - - rc = selCols[id->val_int() - 1]->clone(); - rc->orderPos(id->val_int() - 1); - } - else - { - rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); + rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); if (!rc || gwi.fatalParseError) { + if (ac) + delete ac; return NULL; } + + selCols.push_back(SRCP(rc)); } - // 10.2 TODO: direction is now a tri-state flag - rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); - orderCols.push_back(SRCP(rc)); - } + ORDER** order_item, **end; - rowCol->columnVec(selCols); - (dynamic_cast(ac))->orderCols(orderCols); - parm.reset(rowCol); - - if (gc->str_separator()) - { - string separator; - separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); - (dynamic_cast(ac))->separator(separator); - } - } - else - { - for (uint32_t i = 0; i < isp->argument_count(); i++) - { - Item* sfitemp = sfitempp[i]; - Item::Type sfitype = sfitemp->type(); - - switch (sfitype) + for (order_item = gc->get_order(), + end = order_item + gc->order_field(); order_item < end; + order_item++) { - case Item::FIELD_ITEM: - { - Item_field* ifp = reinterpret_cast(sfitemp); - SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + Item* ord_col = *(*order_item)->item; - if (!sc) + if (ord_col->type() == Item::INT_ITEM) + { + Item_int* id = (Item_int*)ord_col; + + if (id->val_int() > (int)selCols.size()) { gwi.fatalParseError = true; - break; + if (ac) + delete ac; + return NULL; } - parm.reset(sc); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); - TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); - gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); - break; + rc = selCols[id->val_int() - 1]->clone(); + rc->orderPos(id->val_int() - 1); } - - case Item::INT_ITEM: - case Item::STRING_ITEM: - case Item::REAL_ITEM: - case Item::DECIMAL_ITEM: + else { - // treat as count(*) - if (ac->aggOp() == AggregateColumn::COUNT) - ac->aggOp(AggregateColumn::COUNT_ASTERISK); + rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::NULL_ITEM: - { - //ac->aggOp(AggregateColumn::COUNT); - parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); - //ac->functionParms(parm); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::FUNC_ITEM: - { - Item_func* ifp = (Item_func*)sfitemp; - ReturnedColumn* rc = 0; - - // check count(1+1) case - vector tmpVec; - uint16_t parseInfo = 0; - parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); - - if (parseInfo & SUB_BIT) + if (!rc || gwi.fatalParseError) { - gwi.fatalParseError = true; - break; - } - else if (!gwi.fatalParseError && - !(parseInfo & AGG_BIT) && - !(parseInfo & AF_BIT) && - tmpVec.size() == 0) - { - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - FunctionColumn* fc = dynamic_cast(rc); - - if ((fc && fc->functionParms().empty()) || !fc) - { - //ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (dynamic_cast(rc)) - { - //@bug5229. handle constant function on aggregate argument - ac->constCol(SRCP(rc)); - break; - } - } - } - - // MySQL carelessly allows correlated aggregate function on the WHERE clause. - // Here is the work around to deal with that inconsistence. - // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; - ClauseType clauseType = gwi.clauseType; - - if (gwi.clauseType == WHERE) - gwi.clauseType = HAVING; - - // @bug 3603. for cases like max(rand()). try to build function first. - if (!rc) - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - - parm.reset(rc); - gwi.clauseType = clauseType; - - if (gwi.fatalParseError) - break; - - //ac->functionParms(parm); - break; - } - - case Item::REF_ITEM: - { - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (rc) - { - parm.reset(rc); - //ac->functionParms(parm); - break; + if (ac) + delete ac; + return NULL; } } - default: - { - gwi.fatalParseError = true; - //gwi.parseErrorText = "Non-supported Item in Aggregate function"; - } + // 10.2 TODO: direction is now a tri-state flag + rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); + orderCols.push_back(SRCP(rc)); } - if (gwi.fatalParseError) + rowCol->columnVec(selCols); + (dynamic_cast(ac))->orderCols(orderCols); + parm.reset(rowCol); + + if (gc->str_separator()) { - if (gwi.parseErrorText.empty()) - { - Message::Args args; - - if (item->name) - args.add(item->name); - else - args.add(""); - - gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); - } - - return NULL; + string separator; + separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); + (dynamic_cast(ac))->separator(separator); } } - } - - if (parm) - { - ac->functionParms(parm); - - if (isp->sum_func() == Item_sum::AVG_FUNC || - isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::DECIMAL; - ct.colWidth = 8; - ct.scale += 4; - break; - -#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; -#endif - - default: - break; - } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::COUNT_FUNC || - isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = parm->resultType().scale; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_FUNC || - isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - ct.colDataType = CalpontSystemCatalog::BIGINT; - - // no break, let fall through - - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - ct.colWidth = 8; - break; - - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::UBIGINT; - ct.colWidth = 8; - break; - -#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; -#endif - - default: - break; - } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::STD_FUNC || - isp->sum_func() == Item_sum::VARIANCE_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - ct.scale = 0; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = 0; - ct.precision = -16; // borrowed to indicate skip null value check on connector - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) - { - //Item_func_group_concat* gc = (Item_func_group_concat*)isp; - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::VARCHAR; - ct.colWidth = isp->max_length; - ct.precision = 0; - ac->resultType(ct); - } else { - ac->resultType(parm->resultType()); + for (uint32_t i = 0; i < isp->argument_count(); i++) + { + Item* sfitemp = sfitempp[i]; + Item::Type sfitype = sfitemp->type(); + + switch (sfitype) + { + case Item::FIELD_ITEM: + { + Item_field* ifp = reinterpret_cast(sfitemp); + SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + + if (!sc) + { + gwi.fatalParseError = true; + break; + } + + parm.reset(sc); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); + TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); + gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); + break; + } + + case Item::INT_ITEM: + case Item::STRING_ITEM: + case Item::REAL_ITEM: + case Item::DECIMAL_ITEM: + { + // treat as count(*) + if (ac->aggOp() == AggregateColumn::COUNT) + ac->aggOp(AggregateColumn::COUNT_ASTERISK); + + ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + break; + } + + case Item::NULL_ITEM: + { + parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); + ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + break; + } + + case Item::FUNC_ITEM: + { + Item_func* ifp = (Item_func*)sfitemp; + ReturnedColumn* rc = 0; + + // check count(1+1) case + vector tmpVec; + uint16_t parseInfo = 0; + parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); + + if (parseInfo & SUB_BIT) + { + gwi.fatalParseError = true; + break; + } + else if (!gwi.fatalParseError && + !(parseInfo & AGG_BIT) && + !(parseInfo & AF_BIT) && + tmpVec.size() == 0) + { + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + FunctionColumn* fc = dynamic_cast(rc); + + if ((fc && fc->functionParms().empty()) || !fc) + { + //ac->aggOp(AggregateColumn::COUNT_ASTERISK); + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (dynamic_cast(rc)) + { + //@bug5229. handle constant function on aggregate argument + ac->constCol(SRCP(rc)); + break; + } + } + } + + // MySQL carelessly allows correlated aggregate function on the WHERE clause. + // Here is the work around to deal with that inconsistence. + // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; + ClauseType clauseType = gwi.clauseType; + + if (gwi.clauseType == WHERE) + gwi.clauseType = HAVING; + + // @bug 3603. for cases like max(rand()). try to build function first. + if (!rc) + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + + parm.reset(rc); + gwi.clauseType = clauseType; + + if (gwi.fatalParseError) + break; + + break; + } + + case Item::REF_ITEM: + { + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (rc) + { + parm.reset(rc); + break; + } + } + + default: + { + gwi.fatalParseError = true; + //gwi.parseErrorText = "Non-supported Item in Aggregate function"; + } + } + + if (gwi.fatalParseError) + { + if (gwi.parseErrorText.empty()) + { + Message::Args args; + + if (item->name) + args.add(item->name); + else + args.add(""); + + gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); + } + + if (ac) + delete ac; + return NULL; + } + if (parm) + { + // MCOL-1201 multi-argument aggregate + ac->aggParms().push_back(parm); + } + } } - } - else - { - ac->resultType(colType_MysqlToIDB(isp)); - } - // adjust decimal result type according to internalDecimalScale - if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) - { - CalpontSystemCatalog::ColType ct = ac->resultType(); - ct.scale = gwi.internalDecimalScale; - ac->resultType(ct); - } - - // check for same aggregate on the select list - ac->expressionId(ci->expressionId++); - - if (gwi.clauseType != SELECT) - { - for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) + // Get result type + // Modified for MCOL-1201 multi-argument aggregate + if (ac->aggParms().size() > 0) { - if (*ac == gwi.returnedCols[i].get()) - ac->expressionId(gwi.returnedCols[i]->expressionId()); - } - } + // These are all one parm functions, so we can safely + // use the first parm for result type. + parm = ac->aggParms()[0]; + if (isp->sum_func() == Item_sum::AVG_FUNC || + isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); - // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will - // be applied in ExeMgr. When the ExeMgr fix is available, this checking - // will be taken out. - if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + switch (ct.colDataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::DECIMAL; + ct.colWidth = 8; + ct.scale += 4; + break; + + #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; + #endif + + default: + break; + } + + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::COUNT_FUNC || + isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = parm->resultType().scale; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_FUNC || + isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); + + switch (ct.colDataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + ct.colDataType = CalpontSystemCatalog::BIGINT; + + // no break, let fall through + + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + ct.colWidth = 8; + break; + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::UBIGINT; + ct.colWidth = 8; + break; + + #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; + #endif + + default: + break; + } + + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::STD_FUNC || + isp->sum_func() == Item_sum::VARIANCE_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + ct.scale = 0; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = 0; + ct.precision = -16; // borrowed to indicate skip null value check on connector + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + //Item_func_group_concat* gc = (Item_func_group_concat*)isp; + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::VARCHAR; + ct.colWidth = isp->max_length; + ct.precision = 0; + ac->resultType(ct); + } + else + { + // UDAF result type will be set below. + ac->resultType(parm->resultType()); + } + } + else + { + ac->resultType(colType_MysqlToIDB(isp)); + } + + // adjust decimal result type according to internalDecimalScale + if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) + { + CalpontSystemCatalog::ColType ct = ac->resultType(); + ct.scale = gwi.internalDecimalScale; + ac->resultType(ct); + } + + // check for same aggregate on the select list + ac->expressionId(ci->expressionId++); + + if (gwi.clauseType != SELECT) + { + for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) + { + if (*ac == gwi.returnedCols[i].get()) + ac->expressionId(gwi.returnedCols[i]->expressionId()); + } + } + + // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will + // be applied in ExeMgr. When the ExeMgr fix is available, this checking + // will be taken out. + if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "No project column found for aggregate function"; + if (ac) + delete ac; + return NULL; + } + else if (ac->constCol()) + { + gwi.count_asterisk_list.push_back(ac); + } + + // For UDAF, populate the context and call the UDAF init() function. + // The return type is (should be) set in context by init(). + if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) + { + UDAFColumn* udafc = dynamic_cast(ac); + + if (udafc) + { + mcsv1Context& context = udafc->getContext(); + context.setName(isp->func_name()); + + // Set up the return type defaults for the call to init() + context.setResultType(udafc->resultType().colDataType); + context.setColWidth(udafc->resultType().colWidth); + context.setScale(udafc->resultType().scale); + context.setPrecision(udafc->resultType().precision); + + context.setParamCount(udafc->aggParms().size()); + ColumnDatum colType; + ColumnDatum colTypes[udafc->aggParms().size()]; + // Build the column type vector. + // Modified for MCOL-1201 multi-argument aggregate + for (uint32_t i = 0; i < udafc->aggParms().size(); ++i) + { + const execplan::CalpontSystemCatalog::ColType& resultType + = udafc->aggParms()[i]->resultType(); + colType.dataType = resultType.colDataType; + colType.precision = resultType.precision; + colType.scale = resultType.scale; + colTypes[i] = colType; + } + + // Call the user supplied init() + mcsv1sdk::mcsv1_UDAF* udaf = context.getFunction(); + if (!udaf) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "Aggregate Function " + context.getName() + " doesn't exist in the ColumnStore engine"; + if (ac) + delete ac; + return NULL; + } + if (udaf->init(&context, colTypes) == mcsv1_UDAF::ERROR) + { + gwi.fatalParseError = true; + gwi.parseErrorText = udafc->getContext().getErrorMessage(); + if (ac) + delete ac; + return NULL; + } + + // UDAF_OVER_REQUIRED means that this function is for Window + // Function only. Reject it here in aggregate land. + if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) + { + gwi.fatalParseError = true; + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, + context.getName()); + if (ac) + delete ac; + return NULL; + } + + // Set the return type as set in init() + CalpontSystemCatalog::ColType ct; + ct.colDataType = context.getResultType(); + ct.colWidth = context.getColWidth(); + ct.scale = context.getScale(); + ct.precision = context.getPrecision(); + udafc->resultType(ct); + } + } + + } + catch (std::logic_error e) { gwi.fatalParseError = true; - gwi.parseErrorText = "No project column found for aggregate function"; + gwi.parseErrorText = "error building Aggregate Function: "; + gwi.parseErrorText += e.what(); + if (ac) + delete ac; return NULL; } - else if (ac->constCol()) + catch (...) { - gwi.count_asterisk_list.push_back(ac); + gwi.fatalParseError = true; + gwi.parseErrorText = "error building Aggregate Function: Unspecified exception"; + if (ac) + delete ac; + return NULL; } - - // For UDAF, populate the context and call the UDAF init() function. - if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) - { - UDAFColumn* udafc = dynamic_cast(ac); - - if (udafc) - { - mcsv1Context& context = udafc->getContext(); - context.setName(isp->func_name()); - - // Set up the return type defaults for the call to init() - context.setResultType(udafc->resultType().colDataType); - context.setColWidth(udafc->resultType().colWidth); - context.setScale(udafc->resultType().scale); - context.setPrecision(udafc->resultType().precision); - - COL_TYPES colTypes; - execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; - - // Build the column type vector. For now, there is only one - colTypes.push_back(make_pair(udafc->functionParms()->alias(), udafc->functionParms()->resultType().colDataType)); - - // Call the user supplied init() - if (context.getFunction()->init(&context, colTypes) == mcsv1_UDAF::ERROR) - { - gwi.fatalParseError = true; - gwi.parseErrorText = udafc->getContext().getErrorMessage(); - return NULL; - } - - if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) - { - gwi.fatalParseError = true; - gwi.parseErrorText = - logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, - context.getName()); - return NULL; - } - - // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; - ct.colDataType = context.getResultType(); - ct.colWidth = context.getColWidth(); - ct.scale = context.getScale(); - ct.precision = context.getPrecision(); - udafc->resultType(ct); - } - } - return ac; } @@ -7843,7 +7913,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->functionParms(minSc); + (*coliter)->aggParms().push_back(minSc); } std::vector::iterator funciter; @@ -9923,7 +9993,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->functionParms(minSc); + (*coliter)->aggParms().push_back(minSc); } std::vector::iterator funciter; diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 5ca94562b..7cf476f3d 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -781,8 +781,11 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, bool h //double double_val = *(double*)(&value); //f2->store(double_val); - if (f2->decimals() < (uint32_t)row.getScale(s)) - f2->dec = (uint32_t)row.getScale(s); + if ((f2->decimals() == DECIMAL_NOT_SPECIFIED && row.getScale(s) > 0) + || f2->decimals() < row.getScale(s)) + { + f2->dec = row.getScale(s); + } f2->store(dl); @@ -5275,8 +5278,6 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE execplan::CalpontSelectExecutionPlan::ColumnMap::iterator colMapIter; execplan::CalpontSelectExecutionPlan::ColumnMap::iterator condColMapIter; execplan::ParseTree* ptIt; - execplan::ReturnedColumn* rcIt; - for (TABLE_LIST* tl = gi.groupByTables; tl; tl = tl->next_local) { mapiter = ci->tableMap.find(tl->table); diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp index 4b648cb15..8d68a6260 100644 --- a/dbcon/mysql/ha_window_function.cpp +++ b/dbcon/mysql/ha_window_function.cpp @@ -340,6 +340,7 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n ac->distinct(item_sum->has_with_distinct()); Window_spec* win_spec = wf->window_spec; SRCP srcp; + CalpontSystemCatalog::ColType ct; // For return type // arguments vector funcParms; @@ -370,18 +371,25 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n context.setColWidth(rt.colWidth); context.setScale(rt.scale); context.setPrecision(rt.precision); + context.setParamCount(funcParms.size()); + + mcsv1sdk::ColumnDatum colType; + mcsv1sdk::ColumnDatum colTypes[funcParms.size()]; // Turn on the Analytic flag so the function is aware it is being called // as a Window Function. context.setContextFlag(CONTEXT_IS_ANALYTIC); - COL_TYPES colTypes; - execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; - // Build the column type vector. + // Modified for MCOL-1201 multi-argument aggregate for (size_t i = 0; i < funcParms.size(); ++i) { - colTypes.push_back(make_pair(funcParms[i]->alias(), funcParms[i]->resultType().colDataType)); + const execplan::CalpontSystemCatalog::ColType& resultType + = funcParms[i]->resultType(); + colType.dataType = resultType.colDataType; + colType.precision = resultType.precision; + colType.scale = resultType.scale; + colTypes[i] = colType; } // Call the user supplied init() @@ -401,7 +409,6 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n } // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; ct.colDataType = context.getResultType(); ct.colWidth = context.getColWidth(); ct.scale = context.getScale(); @@ -419,10 +426,10 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n { case Item_sum::UDF_SUM_FUNC: { - uint64_t bIgnoreNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)); - char sIgnoreNulls[18]; - sprintf(sIgnoreNulls, "%lu", bIgnoreNulls); - srcp.reset(new ConstantColumn(sIgnoreNulls, (uint64_t)bIgnoreNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT + uint64_t bRespectNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) ? 0 : 1; + char sRespectNulls[18]; + sprintf(sRespectNulls, "%lu", bRespectNulls); + srcp.reset(new ConstantColumn(sRespectNulls, (uint64_t)bRespectNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT funcParms.push_back(srcp); break; } @@ -881,11 +888,13 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n return NULL; } - ac->resultType(colType_MysqlToIDB(item_sum)); - - // bug5736. Make the result type double for some window functions when - // infinidb_double_for_decimal_math is set. - ac->adjustResultType(); + if (item_sum->sum_func() != Item_sum::UDF_SUM_FUNC) + { + ac->resultType(colType_MysqlToIDB(item_sum)); + // bug5736. Make the result type double for some window functions when + // infinidb_double_for_decimal_math is set. + ac->adjustResultType(); + } ac->expressionId(ci->expressionId++); diff --git a/utils/common/any.hpp b/utils/common/any.hpp index be0ca679b..5408c5c87 100755 --- a/utils/common/any.hpp +++ b/utils/common/any.hpp @@ -9,123 +9,142 @@ * http://www.boost.org/LICENSE_1_0.txt */ +#include #include namespace static_any { namespace anyimpl { + struct bad_any_cast + { + }; - struct bad_any_cast - { - }; + struct empty_any + { + }; - struct empty_any - { - }; + struct base_any_policy + { + virtual void static_delete(void** x) = 0; + virtual void copy_from_value(void const* src, void** dest) = 0; + virtual void clone(void* const* src, void** dest) = 0; + virtual void move(void* const* src, void** dest) = 0; + virtual void* get_value(void** src) = 0; + virtual size_t get_size() = 0; + }; - struct base_any_policy - { - virtual void static_delete(void** x) = 0; - virtual void copy_from_value(void const* src, void** dest) = 0; - virtual void clone(void* const* src, void** dest) = 0; - virtual void move(void* const* src, void** dest) = 0; - virtual void* get_value(void** src) = 0; - virtual size_t get_size() = 0; - }; + template + struct typed_base_any_policy : base_any_policy + { + virtual size_t get_size() + { + return sizeof(T); + } + }; - template - struct typed_base_any_policy : base_any_policy - { - virtual size_t get_size() { return sizeof(T); } - }; + template + struct small_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) + { + } + virtual void copy_from_value(void const* src, void** dest) + { + new(dest) T(*reinterpret_cast(src)); + } + virtual void clone(void* const* src, void** dest) + { + *dest = *src; + } + virtual void move(void* const* src, void** dest) + { + *dest = *src; + } + virtual void* get_value(void** src) + { + return reinterpret_cast(src); + } + }; - template - struct small_any_policy : typed_base_any_policy - { - virtual void static_delete(void** x) { } - virtual void copy_from_value(void const* src, void** dest) - { new(dest) T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) { *dest = *src; } - virtual void move(void* const* src, void** dest) { *dest = *src; } - virtual void* get_value(void** src) { return reinterpret_cast(src); } - }; - - template - struct big_any_policy : typed_base_any_policy - { - virtual void static_delete(void** x) + template + struct big_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) { if (*x) - delete(*reinterpret_cast(x)); + delete(*reinterpret_cast(x)); *x = NULL; } - virtual void copy_from_value(void const* src, void** dest) + virtual void copy_from_value(void const* src, void** dest) { - *dest = new T(*reinterpret_cast(src)); + *dest = new T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) + virtual void clone(void* const* src, void** dest) { - *dest = new T(**reinterpret_cast(src)); + *dest = new T(**reinterpret_cast(src)); } - virtual void move(void* const* src, void** dest) + virtual void move(void* const* src, void** dest) { - (*reinterpret_cast(dest))->~T(); - **reinterpret_cast(dest) = **reinterpret_cast(src); + (*reinterpret_cast(dest))->~T(); + **reinterpret_cast(dest) = **reinterpret_cast(src); } - virtual void* get_value(void** src) { return *src; } - }; + virtual void* get_value(void** src) + { + return *src; + } + }; - template - struct choose_policy - { - typedef big_any_policy type; - }; + template + struct choose_policy + { + typedef big_any_policy type; + }; - template - struct choose_policy - { - typedef small_any_policy type; - }; + template + struct choose_policy + { + typedef small_any_policy type; + }; - struct any; + struct any; - /// Choosing the policy for an any type is illegal, but should never happen. - /// This is designed to throw a compiler error. - template<> - struct choose_policy - { - typedef void type; - }; + /// Choosing the policy for an any type is illegal, but should never happen. + /// This is designed to throw a compiler error. + template<> + struct choose_policy + { + typedef void type; + }; - /// Specializations for small types. - #define SMALL_POLICY(TYPE) template<> struct \ - choose_policy { typedef small_any_policy type; }; + /// Specializations for small types. +#define SMALL_POLICY(TYPE) template<> struct \ + choose_policy { typedef small_any_policy type; }; - SMALL_POLICY(char); - SMALL_POLICY(signed char); - SMALL_POLICY(unsigned char); - SMALL_POLICY(signed short); - SMALL_POLICY(unsigned short); - SMALL_POLICY(signed int); - SMALL_POLICY(unsigned int); - SMALL_POLICY(signed long); - SMALL_POLICY(unsigned long); - SMALL_POLICY(signed long long); - SMALL_POLICY(unsigned long long); - SMALL_POLICY(float); - SMALL_POLICY(double); - SMALL_POLICY(bool); + SMALL_POLICY(char); + SMALL_POLICY(signed char); + SMALL_POLICY(unsigned char); + SMALL_POLICY(signed short); + SMALL_POLICY(unsigned short); + SMALL_POLICY(signed int); + SMALL_POLICY(unsigned int); + SMALL_POLICY(signed long); + SMALL_POLICY(unsigned long); + SMALL_POLICY(signed long long); + SMALL_POLICY(unsigned long long); + SMALL_POLICY(float); + SMALL_POLICY(double); + SMALL_POLICY(bool); - #undef SMALL_POLICY +#undef SMALL_POLICY - /// This function will return a different policy for each type. - template - base_any_policy* get_policy() - { - static typename choose_policy::type policy; - return &policy; - }; + /// This function will return a different policy for each type. + template + base_any_policy* get_policy() + { + static typename choose_policy::type policy; + return &policy; + }; } class any @@ -139,37 +158,40 @@ public: /// Initializing constructor. template any(const T& x) - : policy(anyimpl::get_policy()), object(NULL) + : policy(anyimpl::get_policy()), object(NULL) { assign(x); } /// Empty constructor. any() - : policy(anyimpl::get_policy()), object(NULL) - { } + : policy(anyimpl::get_policy()), object(NULL) + { + } /// Special initializing constructor for string literals. any(const char* x) - : policy(anyimpl::get_policy()), object(NULL) - { + : policy(anyimpl::get_policy()), object(NULL) + { assign(x); } /// Copy constructor. any(const any& x) - : policy(anyimpl::get_policy()), object(NULL) - { + : policy(anyimpl::get_policy()), object(NULL) + { assign(x); } /// Destructor. - ~any() { + ~any() + { policy->static_delete(&object); } /// Assignment function from another any. - any& assign(const any& x) { + any& assign(const any& x) + { reset(); policy = x.policy; policy->clone(&x.object, &object); @@ -178,7 +200,8 @@ public: /// Assignment function. template - any& assign(const T& x) { + any& assign(const T& x) + { reset(); policy = anyimpl::get_policy(); policy->copy_from_value(&x, &object); @@ -197,8 +220,42 @@ public: return assign(x); } + /// Less than operator for sorting + bool operator<(const any& x) const + { + if (policy == x.policy) + { + void* p1 = const_cast(object); + void* p2 = const_cast(x.object); + return memcmp(policy->get_value(&p1), + x.policy->get_value(&p2), + policy->get_size()) < 0 ? 1 : 0; + } + return 0; + } + + /// equal operator + bool operator==(const any& x) const + { + if (policy == x.policy) + { + void* p1 = const_cast(object); + void* p2 = const_cast(x.object); + return memcmp(policy->get_value(&p1), + x.policy->get_value(&p2), + policy->get_size()) == 0 ? 1 : 0; + } + return 0; + } + /// Utility functions - any& swap(any& x) { + uint8_t getHash() const + { + void* p1 = const_cast(object); + return *(uint64_t*)policy->get_value(&p1) % 4048; + } + any& swap(any& x) + { std::swap(policy, x.policy); std::swap(object, x.object); return *this; @@ -206,27 +263,32 @@ public: /// Cast operator. You can only cast to the original type. template - T& cast() { - if (policy != anyimpl::get_policy()) + T& cast() + { + if (policy != anyimpl::get_policy()) throw anyimpl::bad_any_cast(); T* r = reinterpret_cast(policy->get_value(&object)); return *r; } /// Returns true if the any contains no value. - bool empty() const { + bool empty() const + { return policy == anyimpl::get_policy(); } /// Frees any allocated memory, and sets the value to NULL. - void reset() { + void reset() + { policy->static_delete(&object); policy = anyimpl::get_policy(); } /// Returns true if the two types are the same. - bool compatible(const any& x) const { + bool compatible(const any& x) const + { return policy == x.policy; } }; + } diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 8d110cfc8..c1f5bbd63 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -215,6 +215,22 @@ inline string getStringNullValue() namespace rowgroup { +const std::string typeStr(""); +const static_any::any& RowAggregation::charTypeId((char)1); +const static_any::any& RowAggregation::scharTypeId((signed char)1); +const static_any::any& RowAggregation::shortTypeId((short)1); +const static_any::any& RowAggregation::intTypeId((int)1); +const static_any::any& RowAggregation::longTypeId((long)1); +const static_any::any& RowAggregation::llTypeId((long long)1); +const static_any::any& RowAggregation::ucharTypeId((unsigned char)1); +const static_any::any& RowAggregation::ushortTypeId((unsigned short)1); +const static_any::any& RowAggregation::uintTypeId((unsigned int)1); +const static_any::any& RowAggregation::ulongTypeId((unsigned long)1); +const static_any::any& RowAggregation::ullTypeId((unsigned long long)1); +const static_any::any& RowAggregation::floatTypeId((float)1); +const static_any::any& RowAggregation::doubleTypeId((double)1); +const static_any::any& RowAggregation::strTypeId(typeStr); + KeyStorage::KeyStorage(const RowGroup& keys, Row** tRow) : tmpRow(tRow), rg(keys) { RGData data(rg); @@ -691,7 +707,8 @@ RowAggregation::RowAggregation(const vector& rowAggGroupByCol RowAggregation::RowAggregation(const RowAggregation& rhs): fAggMapPtr(NULL), fRowGroupOut(NULL), fTotalRowCount(0), fMaxTotalRowCount(AGG_ROWGROUP_SIZE), - fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0) + fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0), + fRGContext(rhs.fRGContext) { //fGroupByCols.clear(); //fFunctionCols.clear(); @@ -756,7 +773,6 @@ void RowAggregation::addRowGroup(const RowGroup* pRows, vector& in { // this function is for threaded aggregation, which is for group by and distinct. // if (countSpecial(pRows)) - Row rowIn; pRows->initRow(&rowIn); @@ -790,7 +806,7 @@ void RowAggregation::setJoinRowGroups(vector* pSmallSideRG, RowGroup* } //------------------------------------------------------------------------------ -// For UDAF, we need to sometimes start a new context. +// For UDAF, we need to sometimes start a new fRGContext. // // This will be called any number of times by each of the batchprimitiveprocessor // threads on the PM and by multple threads on the UM. It must remain @@ -801,29 +817,29 @@ void RowAggregation::resetUDAF(uint64_t funcColID) // Get the UDAF class pointer and store in the row definition object. RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColID].get()); - // resetUDAF needs to be re-entrant. Since we're modifying the context object - // by creating a new userData, we need a local copy. The copy constructor - // doesn't copy userData. - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + // RowAggregation and it's functions need to be re-entrant which means + // each instance (thread) needs its own copy of the context object. + // Note: operator=() doesn't copy userData. + fRGContext = rowUDAF->fUDAFContext; // Call the user reset for the group userData. Since, at this point, // context's userData will be NULL, reset will generate a new one. mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->reset(&rgContext); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } fRow.setUserDataStore(fRowGroupOut->getRGData()->getUserDataStore()); - fRow.setUserData(rgContext, - rgContext.getUserDataSP(), - rgContext.getUserDataSize(), + fRow.setUserData(fRGContext, + fRGContext.getUserDataSP(), + fRGContext.getUserDataSize(), rowUDAF->fAuxColumnIndex); - rgContext.setUserData(NULL); // Prevents calling deleteUserData on the context. + fRGContext.setUserData(NULL); // Prevents calling deleteUserData on the fRGContext. } //------------------------------------------------------------------------------ @@ -873,7 +889,6 @@ void RowAggregation::initialize() } } - // Save the RowGroup data pointer fResultDataVec.push_back(fRowGroupOut->getRGData()); @@ -1658,10 +1673,11 @@ void RowAggregation::updateEntry(const Row& rowIn) { for (uint64_t i = 0; i < fFunctionCols.size(); i++) { - int64_t colIn = fFunctionCols[i]->fInputColumnIndex; - int64_t colOut = fFunctionCols[i]->fOutputColumnIndex; + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[i]; + int64_t colIn = pFunctionCol->fInputColumnIndex; + int64_t colOut = pFunctionCol->fOutputColumnIndex; - switch (fFunctionCols[i]->fAggFunction) + switch (pFunctionCol->fAggFunction) { case ROWAGG_COUNT_COL_NAME: @@ -1675,7 +1691,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_MIN: case ROWAGG_MAX: case ROWAGG_SUM: - doMinMaxSum(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction); + doMinMaxSum(rowIn, colIn, colOut, pFunctionCol->fAggFunction); break; case ROWAGG_AVG: @@ -1692,7 +1708,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_BIT_OR: case ROWAGG_BIT_XOR: { - doBitOp(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction); + doBitOp(rowIn, colIn, colOut, pFunctionCol->fAggFunction); break; } @@ -1707,11 +1723,11 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); + RowUDAFFunctionCol* rowUDAF = dynamic_cast(pFunctionCol.get()); if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF); + doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF, i); } else { @@ -1725,7 +1741,7 @@ void RowAggregation::updateEntry(const Row& rowIn) { std::ostringstream errmsg; errmsg << "RowAggregation: function (id = " << - (uint64_t) fFunctionCols[i]->fAggFunction << ") is not supported."; + (uint64_t) pFunctionCol->fAggFunction << ") is not supported."; cerr << errmsg.str() << endl; throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); break; @@ -1997,131 +2013,142 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu } void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF) + RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { - std::vector valsIn; - execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupIn.getColTypes()[colIn]; - std::vector dataFlags; + int32_t paramCount = fRGContext.getParameterCount(); + // The vector of parameters to be sent to the UDAF + mcsv1sdk::ColumnDatum valsIn[paramCount]; + uint32_t dataFlags[paramCount]; - // Get the context for this rowGroup. Make a copy so we're thread safe. - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); - - // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; - - if (isNull(&fRowGroupIn, rowIn, colIn) == true) + execplan::CalpontSystemCatalog::ColDataType colDataType; + for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) { - if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + // Turn on NULL flags + dataFlags[i] = 0; + if (isNull(&fRowGroupIn, rowIn, colIn) == true) { - return; + if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + { + return; + } + dataFlags[i] |= mcsv1sdk::PARAM_IS_NULL; + } + + colDataType = fRowGroupIn.getColTypes()[colIn]; + if (!fRGContext.isParamNull(i)) + { + switch (colDataType) + { + case execplan::CalpontSystemCatalog::TINYINT: + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + case execplan::CalpontSystemCatalog::INT: + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + break; + } + + case execplan::CalpontSystemCatalog::UTINYINT: + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + case execplan::CalpontSystemCatalog::UINT: + case execplan::CalpontSystemCatalog::UBIGINT: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + datum.columnData = rowIn.getUintField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + { + datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; + datum.columnData = rowIn.getDoubleField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + { + datum.dataType = execplan::CalpontSystemCatalog::FLOAT; + datum.columnData = rowIn.getFloatField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + datum.columnData = rowIn.getUintField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::TIME: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + datum.columnData = rowIn.getIntField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + { + datum.dataType = colDataType; + datum.columnData = rowIn.getStringField(colIn); + break; + } + + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation " << fRGContext.getName() << + ": No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } + } } - flag |= mcsv1sdk::PARAM_IS_NULL; - } - - flags.push_back(flag); - rgContext.setDataFlags(&flags); - - mcsv1sdk::ColumnDatum datum; - - if (!rgContext.isParamNull(0)) - { - switch (colDataType) + // MCOL-1201: If there are multiple parameters, the next fFunctionCols + // will have the column used. By incrementing the funcColsIdx (passed by + // ref, we also increment the caller's index. + if (fFunctionCols.size() > funcColsIdx + 1 + && fFunctionCols[funcColsIdx+1]->fAggFunction == ROWAGG_MULTI_PARM) { - case execplan::CalpontSystemCatalog::TINYINT: - case execplan::CalpontSystemCatalog::SMALLINT: - case execplan::CalpontSystemCatalog::MEDINT: - case execplan::CalpontSystemCatalog::INT: - case execplan::CalpontSystemCatalog::BIGINT: - case execplan::CalpontSystemCatalog::DECIMAL: - case execplan::CalpontSystemCatalog::UDECIMAL: - { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - datum.scale = fRowGroupIn.getScale()[colIn]; - datum.precision = fRowGroupIn.getPrecision()[colIn]; - break; - } - - case execplan::CalpontSystemCatalog::UTINYINT: - case execplan::CalpontSystemCatalog::USMALLINT: - case execplan::CalpontSystemCatalog::UMEDINT: - case execplan::CalpontSystemCatalog::UINT: - case execplan::CalpontSystemCatalog::UBIGINT: - { - datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::DOUBLE: - case execplan::CalpontSystemCatalog::UDOUBLE: - { - datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; - datum.columnData = rowIn.getDoubleField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::FLOAT: - case execplan::CalpontSystemCatalog::UFLOAT: - { - datum.dataType = execplan::CalpontSystemCatalog::FLOAT; - datum.columnData = rowIn.getFloatField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::DATE: - case execplan::CalpontSystemCatalog::DATETIME: - { - datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::TIME: - { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::CHAR: - case execplan::CalpontSystemCatalog::VARCHAR: - case execplan::CalpontSystemCatalog::TEXT: - case execplan::CalpontSystemCatalog::VARBINARY: - case execplan::CalpontSystemCatalog::CLOB: - case execplan::CalpontSystemCatalog::BLOB: - { - datum.dataType = colDataType; - datum.columnData = rowIn.getStringField(colIn); - break; - } - - default: - { - std::ostringstream errmsg; - errmsg << "RowAggregation " << rgContext.getName() << - ": No logic for data type: " << colDataType; - throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); - break; - } + ++funcColsIdx; + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; + colIn = pFunctionCol->fInputColumnIndex; + colOut = pFunctionCol->fOutputColumnIndex; + } + else + { + break; } } - valsIn.push_back(datum); - // The intermediate values are stored in userData referenced by colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setDataFlags(dataFlags); + fRGContext.setUserData(fRow.getUserData(colAux)); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->nextValue(&rgContext, valsIn); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } } @@ -2218,6 +2245,7 @@ RowAggregationUM::RowAggregationUM(const RowAggregationUM& rhs) : fHasAvg(rhs.fHasAvg), fKeyOnHeap(rhs.fKeyOnHeap), fHasStatsFunc(rhs.fHasStatsFunc), + fHasUDAF(rhs.fHasUDAF), fExpression(rhs.fExpression), fTotalMemUsage(rhs.fTotalMemUsage), fRm(rhs.fRm), @@ -2419,7 +2447,7 @@ void RowAggregationUM::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { @@ -2585,22 +2613,6 @@ void RowAggregationUM::calculateAvgColumns() // Sets the value from valOut into column colOut, performing any conversions. void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) { - static const static_any::any& charTypeId((char)1); - static const static_any::any& scharTypeId((signed char)1); - static const static_any::any& shortTypeId((short)1); - static const static_any::any& intTypeId((int)1); - static const static_any::any& longTypeId((long)1); - static const static_any::any& llTypeId((long long)1); - static const static_any::any& ucharTypeId((unsigned char)1); - static const static_any::any& ushortTypeId((unsigned short)1); - static const static_any::any& uintTypeId((unsigned int)1); - static const static_any::any& ulongTypeId((unsigned long)1); - static const static_any::any& ullTypeId((unsigned long long)1); - static const static_any::any& floatTypeId((float)1); - static const static_any::any& doubleTypeId((double)1); - static const std::string typeStr(""); - static const static_any::any& strTypeId(typeStr); - execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; if (valOut.empty()) @@ -2609,6 +2621,179 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) return; } + int64_t intOut = 0; + uint64_t uintOut = 0; + float floatOut = 0.0; + double doubleOut = 0.0; + ostringstream oss; + std::string strOut; + + bool bSetSuccess = false; + switch (colDataType) + { + case execplan::CalpontSystemCatalog::BIT: + case execplan::CalpontSystemCatalog::TINYINT: + if (valOut.compatible(charTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + else if (valOut.compatible(scharTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + if (bSetSuccess) + { + fRow.setIntField<1>(intOut, colOut); + } + break; + + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + if (valOut.compatible(shortTypeId)) + { + intOut = valOut.cast(); + fRow.setIntField<2>(intOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::INT: + if (valOut.compatible(uintTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + else if (valOut.compatible(longTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + if (bSetSuccess) + { + fRow.setIntField<4>(intOut, colOut); + } + break; + + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + if (valOut.compatible(llTypeId)) + { + intOut = valOut.cast(); + fRow.setIntField<8>(intOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UTINYINT: + if (valOut.compatible(ucharTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<1>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + if (valOut.compatible(ushortTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<2>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UINT: + if (valOut.compatible(uintTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<4>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UBIGINT: + if (valOut.compatible(ulongTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<8>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + if (valOut.compatible(ulongTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<8>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + if (valOut.compatible(floatTypeId)) + { + floatOut = valOut.cast(); + fRow.setFloatField(floatOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + if (valOut.compatible(doubleTypeId)) + { + doubleOut = valOut.cast(); + fRow.setDoubleField(doubleOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + fRow.setStringField(strOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + fRow.setVarBinaryField(strOut, colOut); + bSetSuccess = true; + } + break; + + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation: No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } + } + if (!bSetSuccess) + { + SetUDAFAnyValue(valOut, colOut); + } +} + +void RowAggregationUM::SetUDAFAnyValue(static_any::any& valOut, int64_t colOut) +{ + execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; + // This may seem a bit convoluted. Users shouldn't return a type // that they didn't set in mcsv1_UDAF::init(), but this // handles whatever return type is given and casts @@ -2814,7 +2999,7 @@ void RowAggregationUM::calculateUDAFColumns() continue; rowUDAF = dynamic_cast(fFunctionCols[i].get()); - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + fRGContext = rowUDAF->fUDAFContext; int64_t colOut = rowUDAF->fOutputColumnIndex; int64_t colAux = rowUDAF->fAuxColumnIndex; @@ -2826,26 +3011,26 @@ void RowAggregationUM::calculateUDAFColumns() fRowGroupOut->getRow(j, &fRow); // Turn the NULL flag off. We can't know NULL at this point - rgContext.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); // The intermediate values are stored in colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setUserData(fRow.getUserData(colAux)); // Call the UDAF evaluate function mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->evaluate(&rgContext, valOut); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); } - rgContext.setUserData(NULL); + fRGContext.setUserData(NULL); } } @@ -3116,54 +3301,60 @@ void RowAggregationUM::doNullConstantAggregate(const ConstantAggData& aggData, u { // For a NULL constant, call nextValue with NULL and then evaluate. bool bInterrupted = false; - mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext); - context.setInterrupted(bInterrupted); - context.createUserData(); + fRGContext.setInterrupted(bInterrupted); + fRGContext.createUserData(); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - std::vector valsIn; + mcsv1sdk::ColumnDatum valsIn[1]; // Call a reset, then nextValue, then execute. This will evaluate // the UDAF for the constant. - rc = context.getFunction()->reset(&context); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } - +#if 0 + uint32_t dataFlags[fRGContext.getParameterCount()]; + for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) + { + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + // Turn on NULL flags + dataFlags[i] = 0; + } +#endif // Turn the NULL and CONSTANT flags on. - std::vector flags; - uint32_t flag = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; - flags.push_back(flag); - context.setDataFlags(&flags); + uint32_t flags[1]; + flags[0] = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; + fRGContext.setDataFlags(flags); // Create a dummy datum - mcsv1sdk::ColumnDatum datum; + mcsv1sdk::ColumnDatum& datum = valsIn[0]; datum.dataType = execplan::CalpontSystemCatalog::BIGINT; datum.columnData = 0; - valsIn.push_back(datum); - rc = context.getFunction()->nextValue(&context, valsIn); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } static_any::any valOut; - rc = context.getFunction()->evaluate(&context, valOut); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); - context.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); } break; @@ -3460,30 +3651,28 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData case ROWAGG_UDAF: { bool bInterrupted = false; - mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext); - context.setInterrupted(bInterrupted); - context.createUserData(); + fRGContext.setInterrupted(bInterrupted); + fRGContext.createUserData(); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - std::vector valsIn; + mcsv1sdk::ColumnDatum valsIn[1]; // Call a reset, then nextValue, then execute. This will evaluate // the UDAF for the constant. - rc = context.getFunction()->reset(&context); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Turn the CONSTANT flags on. - std::vector flags; - uint32_t flag = mcsv1sdk::PARAM_IS_CONSTANT; - flags.push_back(flag); - context.setDataFlags(&flags); + uint32_t flags[1]; + flags[0] = mcsv1sdk::PARAM_IS_CONSTANT; + fRGContext.setDataFlags(flags); // Create a datum item for sending to UDAF - mcsv1sdk::ColumnDatum datum; + mcsv1sdk::ColumnDatum& datum = valsIn[0]; datum.dataType = (CalpontSystemCatalog::ColDataType)colDataType; switch (colDataType) @@ -3567,27 +3756,27 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData break; } - valsIn.push_back(datum); - rc = context.getFunction()->nextValue(&context, valsIn); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } static_any::any valOut; - rc = context.getFunction()->evaluate(&context, valOut); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); - context.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); } break; @@ -3806,7 +3995,7 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { @@ -4011,45 +4200,43 @@ void RowAggregationUMP2::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut // rowUDAF(in) - pointer to the RowUDAFFunctionCol for this UDAF instance //------------------------------------------------------------------------------ void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF) + RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { static_any::any valOut; - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); // Get the user data boost::shared_ptr userData = rowIn.getUserData(colIn + 1); // Unlike other aggregates, the data isn't in colIn, so testing it for NULL // there won't help. In case of NULL, userData will be NULL. - std::vector flags; - uint32_t flag = 0; + uint32_t flags[1]; + flags[0] = 0; if (!userData) { - if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { return; } // Turn on NULL flags - flag |= mcsv1sdk::PARAM_IS_NULL; + flags[0] |= mcsv1sdk::PARAM_IS_NULL; } - flags.push_back(flag); - rgContext.setDataFlags(&flags); + fRGContext.setDataFlags(flags); // The intermediate values are stored in colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setUserData(fRow.getUserData(colAux)); // Call the UDAF subEvaluate method mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->subEvaluate(&rgContext, userData.get()); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userData.get()); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::IDBExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::IDBExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } } @@ -4246,7 +4433,7 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index b6294f193..282f354fc 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -110,6 +110,9 @@ enum RowAggFunctionType // User Defined Aggregate Function ROWAGG_UDAF, + // If an Aggregate has more than one parameter, this will be used for parameters after the first + ROWAGG_MULTI_PARM, + // internal function type to avoid duplicate the work // handling ROWAGG_COUNT_NO_OP, ROWAGG_DUP_FUNCT and ROWAGG_DUP_AVG is a little different // ROWAGG_COUNT_NO_OP : count done by AVG, no need to copy @@ -583,7 +586,7 @@ protected: virtual void doAvg(const Row&, int64_t, int64_t, int64_t); virtual void doStatistics(const Row&, int64_t, int64_t, int64_t); virtual void doBitOp(const Row&, int64_t, int64_t, int); - virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); + virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); virtual bool countSpecial(const RowGroup* pRG) { fRow.setIntField<8>(fRow.getIntField<8>(0) + pRG->getRowCount(), 0); @@ -660,6 +663,25 @@ protected: //need access to rowgroup storage holding the rows to hash & ==. friend class AggHasher; friend class AggComparator; + + // We need a separate copy for each thread. + mcsv1sdk::mcsv1Context fRGContext; + + // These are handy for testing the actual type of static_any for UDAF + static const static_any::any& charTypeId; + static const static_any::any& scharTypeId; + static const static_any::any& shortTypeId; + static const static_any::any& intTypeId; + static const static_any::any& longTypeId; + static const static_any::any& llTypeId; + static const static_any::any& ucharTypeId; + static const static_any::any& ushortTypeId; + static const static_any::any& uintTypeId; + static const static_any::any& ulongTypeId; + static const static_any::any& ullTypeId; + static const static_any::any& floatTypeId; + static const static_any::any& doubleTypeId; + static const static_any::any& strTypeId; }; //------------------------------------------------------------------------------ @@ -783,6 +805,9 @@ protected: // Sets the value from valOut into column colOut, performing any conversions. void SetUDAFValue(static_any::any& valOut, int64_t colOut); + // If the datatype returned by evaluate isn't what we expect, convert. + void SetUDAFAnyValue(static_any::any& valOut, int64_t colOut); + // calculate the UDAF function all rows received. UM only function. void calculateUDAFColumns(); @@ -877,7 +902,7 @@ protected: void doStatistics(const Row&, int64_t, int64_t, int64_t); void doGroupConcat(const Row&, int64_t, int64_t); void doBitOp(const Row&, int64_t, int64_t, int); - void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); + void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); bool countSpecial(const RowGroup* pRG) { return false; diff --git a/utils/udfsdk/CMakeLists.txt b/utils/udfsdk/CMakeLists.txt index e69ff4d88..01009e35a 100755 --- a/utils/udfsdk/CMakeLists.txt +++ b/utils/udfsdk/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ########### next target ############### -set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp) +set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp regr_avgx.cpp avgx.cpp) add_definitions(-DMYSQL_DYNAMIC_PLUGIN) diff --git a/utils/udfsdk/allnull.cpp b/utils/udfsdk/allnull.cpp index b6b8d79da..247b9e28f 100644 --- a/utils/udfsdk/allnull.cpp +++ b/utils/udfsdk/allnull.cpp @@ -27,11 +27,11 @@ struct allnull_data #define OUT_TYPE int64_t mcsv1_UDAF::ReturnCode allnull::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { context->setUserDataSize(sizeof(allnull_data)); - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -52,8 +52,7 @@ mcsv1_UDAF::ReturnCode allnull::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { struct allnull_data* data = (struct allnull_data*)context->getUserData()->data; diff --git a/utils/udfsdk/allnull.h b/utils/udfsdk/allnull.h index 86697b052..da17f5d6b 100644 --- a/utils/udfsdk/allnull.h +++ b/utils/udfsdk/allnull.h @@ -103,7 +103,7 @@ public: * colTypes or wrong number of arguments. Else return * mcsv1_UDAF::SUCCESS. */ - virtual ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); /** * reset() @@ -138,7 +138,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() diff --git a/utils/udfsdk/avg_mode.cpp b/utils/udfsdk/avg_mode.cpp index f39b5e402..5429183d9 100644 --- a/utils/udfsdk/avg_mode.cpp +++ b/utils/udfsdk/avg_mode.cpp @@ -25,9 +25,9 @@ using namespace mcsv1sdk; mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("avg_mode() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode avg_mode::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; MODE_DATA& data = static_cast(context->getUserData())->mData; @@ -187,8 +186,7 @@ mcsv1_UDAF::ReturnCode avg_mode::evaluate(mcsv1Context* context, static_any::any return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; MODE_DATA& data = static_cast(context->getUserData())->mData; diff --git a/utils/udfsdk/avg_mode.h b/utils/udfsdk/avg_mode.h index 4f3442005..5722c5fea 100644 --- a/utils/udfsdk/avg_mode.h +++ b/utils/udfsdk/avg_mode.h @@ -18,7 +18,7 @@ /*********************************************************************** * $Id$ * -* mcsv1_UDAF.h +* avg_mode.h ***********************************************************************/ /** @@ -50,8 +50,8 @@ * is also used to describe the interface that is used for * either. */ -#ifndef HEADER_mode -#define HEADER_mode +#ifndef HEADER_avg_mode +#define HEADER_avg_mode #include #include @@ -134,7 +134,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -169,8 +169,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -246,8 +245,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() diff --git a/utils/udfsdk/mcsv1_udaf.cpp b/utils/udfsdk/mcsv1_udaf.cpp index 349a642ec..ee08dcc07 100644 --- a/utils/udfsdk/mcsv1_udaf.cpp +++ b/utils/udfsdk/mcsv1_udaf.cpp @@ -36,6 +36,8 @@ UDAF_MAP UDAFMap::fm; #include "ssq.h" #include "median.h" #include "avg_mode.h" +#include "regr_avgx.h" +#include "avgx.h" UDAF_MAP& UDAFMap::getMap() { if (fm.size() > 0) @@ -52,6 +54,8 @@ UDAF_MAP& UDAFMap::getMap() fm["ssq"] = new ssq(); fm["median"] = new median(); fm["avg_mode"] = new avg_mode(); + fm["regr_avgx"] = new regr_avgx(); + fm["avgx"] = new avgx(); return fm; } @@ -115,8 +119,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const { // We don't test the per row data fields. They don't determine // if it's the same Context. - if (getName() != c.getName() - || fRunFlags != c.fRunFlags + if (getName() != c.getName() + ||fRunFlags != c.fRunFlags || fContextFlags != c.fContextFlags || fUserDataSize != c.fUserDataSize || fResultType != c.fResultType @@ -125,7 +129,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const || fStartFrame != c.fStartFrame || fEndFrame != c.fEndFrame || fStartConstant != c.fStartConstant - || fEndConstant != c.fEndConstant) + || fEndConstant != c.fEndConstant + || fParamCount != c.fParamCount) return false; return true; @@ -217,6 +222,7 @@ void mcsv1Context::serialize(messageqcpp::ByteStream& b) const b << (uint32_t)fEndFrame; b << fStartConstant; b << fEndConstant; + b << fParamCount; } void mcsv1Context::unserialize(messageqcpp::ByteStream& b) @@ -238,6 +244,7 @@ void mcsv1Context::unserialize(messageqcpp::ByteStream& b) fEndFrame = (WF_FRAME)frame; b >> fStartConstant; b >> fEndConstant; + b >> fParamCount; } void UserData::serialize(messageqcpp::ByteStream& bs) const diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index d24852c28..df3f47649 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -77,6 +77,7 @@ #include "any.hpp" #include "calpontsystemcatalog.h" #include "wf_frame.h" +#include "my_decimal_limits.h" using namespace execplan; @@ -200,12 +201,8 @@ static uint64_t CONTEXT_IS_PM __attribute__ ((unused)) = 1 << 2; // Flags that describe the contents of a specific input parameter // These will be set in context->dataFlags for each method call by the framework. // User code shouldn't use these directly -static uint64_t PARAM_IS_NULL __attribute__ ((unused)) = 1; -static uint64_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; - -// shorthand for the list of columns in the call sent to init() -// first is the actual column name and second is the data type in Columnstore. -typedef std::vector >COL_TYPES; +static uint32_t PARAM_IS_NULL __attribute__ ((unused)) = 1; +static uint32_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; // This is the context class that is passed to all API callbacks // The framework potentially sets data here for each invocation of @@ -269,7 +266,9 @@ public: EXPORT bool isPM(); // Parameter refinement description accessors - // valid in nextValue and dropValue + + // How many actual parameters were entered. + // valid in all calls size_t getParameterCount() const; // Determine if an input parameter is NULL @@ -298,6 +297,7 @@ public: // This only makes sense if the return type is decimal, but should be set // to (0, -1) for other types if the inout is decimal. // valid in init() + // Set the scale to DECIMAL_NOT_SPECIFIED if you want a floating decimal. EXPORT bool setScale(int32_t scale); EXPORT bool setPrecision(int32_t precision); @@ -372,7 +372,7 @@ private: int32_t fResultscale; // For scale, the number of digits to the right of the decimal int32_t fResultPrecision; // The max number of digits allowed in the decimal value std::string errorMsg; - std::vector* dataFlags; // one entry for each parameter + uint32_t* dataFlags; // an integer array wirh one entry for each parameter bool* bInterrupted; // Gets set to true by the Framework if something happens WF_FRAME fStartFrame; // Is set to default to start, then modified by the actual frame in the call WF_FRAME fEndFrame; // Is set to default to start, then modified by the actual frame in the call @@ -380,6 +380,7 @@ private: int32_t fEndConstant; // for end frame WF_PRECEEDIMG or WF_FOLLOWING std::string functionName; mcsv1sdk::mcsv1_UDAF* func; + int32_t fParamCount; public: // For use by the framework @@ -394,13 +395,14 @@ public: EXPORT void clearContextFlag(uint64_t flag); EXPORT uint64_t getContextFlags() const; EXPORT uint32_t getUserDataSize() const; - EXPORT std::vector& getDataFlags(); - EXPORT void setDataFlags(std::vector* flags); + EXPORT uint32_t* getDataFlags(); + EXPORT void setDataFlags(uint32_t* flags); EXPORT void setInterrupted(bool interrupted); EXPORT void setInterrupted(bool* interrupted); EXPORT mcsv1sdk::mcsv1_UDAF* getFunction(); EXPORT mcsv1sdk::mcsv1_UDAF* getFunction() const; EXPORT boost::shared_ptr getUserDataSP(); + EXPORT void setParamCount(int32_t paramCount); }; // Since aggregate functions can operate on any data type, we use the following structure @@ -419,9 +421,10 @@ public: struct ColumnDatum { CalpontSystemCatalog::ColDataType dataType; // defined in calpontsystemcatalog.h - static_any::any columnData; + static_any::any columnData; // Not valid in init() uint32_t scale; // If dataType is a DECIMAL type uint32_t precision; // If dataType is a DECIMAL type + std::string alias; // Only filled in for init() ColumnDatum() : dataType(CalpontSystemCatalog::UNDEFINED), scale(0), precision(-1) {}; }; @@ -466,7 +469,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes) = 0; + ColumnDatum* colTypes) = 0; /** * reset() @@ -501,8 +504,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn) = 0; + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn) = 0; /** * subEvaluate() @@ -579,8 +581,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() @@ -640,32 +641,32 @@ inline mcsv1Context::mcsv1Context() : fEndFrame(WF_CURRENT_ROW), fStartConstant(0), fEndConstant(0), - func(NULL) + func(NULL), + fParamCount(0) { } inline mcsv1Context::mcsv1Context(const mcsv1Context& rhs) : - fContextFlags(0), - fColWidth(0), - dataFlags(NULL), - bInterrupted(NULL), - func(NULL) + dataFlags(NULL) { copy(rhs); } inline mcsv1Context& mcsv1Context::copy(const mcsv1Context& rhs) { - fRunFlags = rhs.getRunFlags(); - fResultType = rhs.getResultType(); - fUserDataSize = rhs.getUserDataSize(); - fResultscale = rhs.getScale(); - fResultPrecision = rhs.getPrecision(); + fRunFlags = rhs.fRunFlags; + fContextFlags = rhs.fContextFlags; + fResultType = rhs.fResultType; + fUserDataSize = rhs.fUserDataSize; + fColWidth = rhs.fColWidth; + fResultscale = rhs.fResultscale; + fResultPrecision = rhs.fResultPrecision; rhs.getStartFrame(fStartFrame, fStartConstant); rhs.getEndFrame(fEndFrame, fEndConstant); - functionName = rhs.getName(); - bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference - func = rhs.func; + functionName = rhs.functionName; + bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference + func = rhs.func; + fParamCount = rhs.fParamCount; return *this; } @@ -675,11 +676,7 @@ inline mcsv1Context::~mcsv1Context() inline mcsv1Context& mcsv1Context::operator=(const mcsv1Context& rhs) { - fContextFlags = 0; - fColWidth = 0; dataFlags = NULL; - bInterrupted = NULL; - func = NULL; return copy(rhs); } @@ -753,16 +750,13 @@ inline bool mcsv1Context::isPM() inline size_t mcsv1Context::getParameterCount() const { - if (dataFlags) - return dataFlags->size(); - - return 0; + return fParamCount; } inline bool mcsv1Context::isParamNull(int paramIdx) { if (dataFlags) - return (*dataFlags)[paramIdx] & PARAM_IS_NULL; + return dataFlags[paramIdx] & PARAM_IS_NULL; return false; } @@ -770,7 +764,7 @@ inline bool mcsv1Context::isParamNull(int paramIdx) inline bool mcsv1Context::isParamConstant(int paramIdx) { if (dataFlags) - return (*dataFlags)[paramIdx] & PARAM_IS_CONSTANT; + return dataFlags[paramIdx] & PARAM_IS_CONSTANT; return false; } @@ -939,18 +933,22 @@ inline uint32_t mcsv1Context::getUserDataSize() const return fUserDataSize; } -inline std::vector& mcsv1Context::getDataFlags() +inline uint32_t* mcsv1Context::getDataFlags() { - return *dataFlags; + return dataFlags; } -inline void mcsv1Context::setDataFlags(std::vector* flags) +inline void mcsv1Context::setDataFlags(uint32_t* flags) { dataFlags = flags; } -inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, - std::vector& valsDropped) +inline void mcsv1Context::setParamCount(int32_t paramCount) +{ + fParamCount = paramCount; +} + +inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { return NOT_IMPLEMENTED; } diff --git a/utils/udfsdk/median.cpp b/utils/udfsdk/median.cpp index e32d721f1..9c7e72dc3 100644 --- a/utils/udfsdk/median.cpp +++ b/utils/udfsdk/median.cpp @@ -25,9 +25,9 @@ using namespace mcsv1sdk; mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("median() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode median::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; MEDIAN_DATA& data = static_cast(context->getUserData())->mData; @@ -212,8 +211,7 @@ mcsv1_UDAF::ReturnCode median::evaluate(mcsv1Context* context, static_any::any& return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; MEDIAN_DATA& data = static_cast(context->getUserData())->mData; diff --git a/utils/udfsdk/median.h b/utils/udfsdk/median.h index d64792461..142be6ba8 100644 --- a/utils/udfsdk/median.h +++ b/utils/udfsdk/median.h @@ -134,7 +134,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -169,8 +169,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -246,8 +245,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() diff --git a/utils/udfsdk/ssq.cpp b/utils/udfsdk/ssq.cpp index 4d9ef7e10..20fdc33db 100644 --- a/utils/udfsdk/ssq.cpp +++ b/utils/udfsdk/ssq.cpp @@ -34,9 +34,9 @@ struct ssq_data #define OUT_TYPE int64_t mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -44,13 +44,13 @@ mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("ssq() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -81,8 +81,7 @@ mcsv1_UDAF::ReturnCode ssq::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; @@ -183,8 +182,7 @@ mcsv1_UDAF::ReturnCode ssq::evaluate(mcsv1Context* context, static_any::any& val return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; diff --git a/utils/udfsdk/ssq.h b/utils/udfsdk/ssq.h index 514c7a3f0..2cac61c2c 100644 --- a/utils/udfsdk/ssq.h +++ b/utils/udfsdk/ssq.h @@ -114,7 +114,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -147,8 +147,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -224,8 +223,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); protected: }; diff --git a/utils/udfsdk/udfmysql.cpp b/utils/udfsdk/udfmysql.cpp index 981651c43..dc0277ccc 100644 --- a/utils/udfsdk/udfmysql.cpp +++ b/utils/udfsdk/udfmysql.cpp @@ -490,6 +490,168 @@ extern "C" // return data->sumsq; return 0; } + +//======================================================================= + + /** + * regr_avgx connector stub + */ + struct regr_avgx_data + { + double sumx; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_avgx_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_avgx() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // TODO test for NULL in x and y + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + ++data->cnt; + data->sumx += xval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } + +//======================================================================= + + /** + * avgx connector stub. Exactly the same functionality as the + * built in avg() function. Use to test the performance of the + * API + */ + struct avgx_data + { + double sumx; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct avgx_data* data; + if (args->arg_count != 1) + { + strcpy(message,"avgx() requires one argument"); + return 1; + } + + if (!(data = (struct avgx_data*) malloc(sizeof(struct avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct avgx_data* data = (struct avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // TODO test for NULL in x and y + struct avgx_data* data = (struct avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + ++data->cnt; + data->sumx += xval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct avgx_data* data = (struct avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } } // vim:ts=4 sw=4: diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index 664b0e7de..fe1f3fd0e 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -204,8 +204,10 @@ Filters="*.c;*.C;*.cc;*.cpp;*.cp;*.cxx;*.c++;*.prg;*.pas;*.dpr;*.asm;*.s;*.bas;*.java;*.cs;*.sc;*.e;*.cob;*.html;*.rc;*.tcl;*.py;*.pl;*.d"> + + @@ -215,8 +217,10 @@ Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if"> + + diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index f302c49cd..5cd5243c5 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -52,6 +52,7 @@ using namespace joblist; namespace windowfunction { + template boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context) { @@ -142,7 +143,7 @@ template void WF_udaf::resetData() { getContext().getFunction()->reset(&getContext()); - fSet.clear(); + fDistinctSet.clear(); WindowFunctionType::resetData(); } @@ -150,8 +151,8 @@ template void WF_udaf::parseParms(const std::vector& parms) { bRespectNulls = true; - // parms[1]: respect null | ignore null - ConstantColumn* cc = dynamic_cast(parms[1].get()); + // The last parms: respect null | ignore null + ConstantColumn* cc = dynamic_cast(parms[parms.size()-1].get()); idbassert(cc != NULL); bool isNull = false; // dummy, harded coded bRespectNulls = (cc->getIntVal(fRow, isNull) > 0); @@ -167,52 +168,71 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) } mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - uint64_t colOut = fFieldIndex[0]; - uint64_t colIn = fFieldIndex[1]; - mcsv1sdk::ColumnDatum datum; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colOut); + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC); + + // Put the parameter metadata (type, scale, precision) into valsIn + mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) + { + uint64_t colIn = fFieldIndex[i+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } for (int64_t i = b; i < e; i++) { if (i % 1000 == 0 && fStep->cancelled()) break; + bool bHasNull = false; fRow.setData(getPointer(fRowData->at(i))); // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; + uint32_t flags[getContext().getParameterCount()]; - if (fRow.isNullValue(colIn) == true) + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - if (!bRespectNulls) + uint64_t colIn = fFieldIndex[k+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[k]; + flags[k] = 0; + if (fRow.isNullValue(colIn) == true) { - continue; + if (!bRespectNulls) + { + bHasNull = true; + break; + } + + flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - flag |= mcsv1sdk::PARAM_IS_NULL; + T valIn; + getValue(colIn, valIn, &datum.dataType); + + // Check for distinct, if turned on. + // Currently, distinct only works for param 1 + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + + datum.columnData = valIn; } - - flags.push_back(flag); - getContext().setDataFlags(&flags); - - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - // TODO: when we impliment distinct, we need to revist this. - if ((fDistinct) || (fSet.find(valIn) != fSet.end())) + if (bHasNull) { continue; } - datum.columnData = valIn; - - std::vector valsIn; - valsIn.push_back(datum); - rc = getContext().getFunction()->dropValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::NOT_IMPLEMENTED) @@ -442,59 +462,191 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) else if (fPrev <= e && fPrev > c) e = c; - uint64_t colIn = fFieldIndex[1]; + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC); - mcsv1sdk::ColumnDatum datum; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colOut); + // Put the parameter metadata (type, scale, precision) into valsIn + mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) + { + uint64_t colIn = fFieldIndex[i+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } if (b <= c && c <= e) getContext().setContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); else getContext().clearContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); - + bool bHasNull = false; for (int64_t i = b; i <= e; i++) { if (i % 1000 == 0 && fStep->cancelled()) break; fRow.setData(getPointer(fRowData->at(i))); - // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; - if (fRow.isNullValue(colIn) == true) + // NULL flags + uint32_t flags[getContext().getParameterCount()]; + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - if (!bRespectNulls) + uint64_t colIn = fFieldIndex[k+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[k]; + + // Turn on Null flags or skip based on respect nulls + flags[k] = 0; + if (fRow.isNullValue(colIn) == true) + { + if (!bRespectNulls) + { + bHasNull = true; + break; + } + + flags[k] |= mcsv1sdk::PARAM_IS_NULL; + } + + // MCOL-1201 Multi-Paramter calls + switch (datum.dataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + { + int64_t valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + case CalpontSystemCatalog::UDECIMAL: + { + uint64_t valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + { + double valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + { + float valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::BLOB: + { + string valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + default: + { + string errStr = "(" + colType2String[i] + ")"; + errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + + break; + } + } + // Skip if any value is NULL and respect nulls is off. + if (bHasNull) { continue; } - - flag |= mcsv1sdk::PARAM_IS_NULL; } - - flags.push_back(flag); - getContext().setDataFlags(&flags); - - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - if ((fDistinct) || (fSet.find(valIn) != fSet.end())) - { - continue; - } - - if (fDistinct) - fSet.insert(valIn); - - datum.columnData = valIn; - - std::vector valsIn; - valsIn.push_back(datum); - + getContext().setDataFlags(flags); + rc = getContext().getFunction()->nextValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) diff --git a/utils/windowfunction/wf_udaf.h b/utils/windowfunction/wf_udaf.h index babb32565..f7a4c4b08 100644 --- a/utils/windowfunction/wf_udaf.h +++ b/utils/windowfunction/wf_udaf.h @@ -21,13 +21,35 @@ #ifndef UTILS_WF_UDAF_H #define UTILS_WF_UDAF_H -#include +#ifndef _MSC_VER +#include +#else +#include +#endif #include "windowfunctiontype.h" #include "mcsv1_udaf.h" namespace windowfunction { +// Hash classes for the distinct hashmap +class DistinctHasher +{ +public: + inline size_t operator()(const static_any::any& a) const + { + return a.getHash(); + } +}; + +class DistinctEqual +{ +public: + inline bool operator()(const static_any::any& lhs, static_any::any& rhs) const + { + return lhs == rhs; + } +}; // A class to control the execution of User Define Analytic Functions (UDAnF) // as defined by a specialization of mcsv1sdk::mcsv1_UDAF @@ -72,7 +94,8 @@ protected: bool fDistinct; bool bRespectNulls; // respect null | ignore null bool bHasDropValue; // Set to false when we discover the UDAnF doesn't implement dropValue. - std::set fSet; // To hold distinct values + // To hold distinct values + std::tr1::unordered_set fDistinctSet; static_any::any fValOut; // The return value public: diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp index 950045899..4c5b4de32 100644 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -492,10 +492,10 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos) static uint64_t dateNull = joblist::DATENULL; static uint64_t datetimeNull = joblist::DATETIMENULL; static uint64_t timeNull = joblist::TIMENULL; - static uint64_t char1Null = joblist::CHAR1NULL; - static uint64_t char2Null = joblist::CHAR2NULL; - static uint64_t char4Null = joblist::CHAR4NULL; - static uint64_t char8Null = joblist::CHAR8NULL; +// static uint64_t char1Null = joblist::CHAR1NULL; +// static uint64_t char2Null = joblist::CHAR2NULL; +// static uint64_t char4Null = joblist::CHAR4NULL; +// static uint64_t char8Null = joblist::CHAR8NULL; static string stringNull(""); void* v = NULL; diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index 5d3dfec85..41c788693 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -1280,7 +1280,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -2024,10 +2024,10 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 0; k < colStructList.size(); k++) + for (size_t k = 0; k < colStructList.size(); k++) { // Skip the selected column - if (k == colId) + if (k == (size_t)colId) continue; Column expandCol; @@ -2582,7 +2582,7 @@ int WriteEngineWrapper::insertColumnRec_SYS(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -3277,7 +3277,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; From 956db53dfd270e8d6b701f8c32de11476adc4ca6 Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 11 May 2018 09:53:49 -0500 Subject: [PATCH 011/123] MCOL-1201 Add test UDAF back in after rebase --- utils/udfsdk/avgx.cpp | 257 +++++++++++++++++++++++++++++++++++ utils/udfsdk/avgx.h | 99 ++++++++++++++ utils/udfsdk/regr_avgx.cpp | 270 +++++++++++++++++++++++++++++++++++++ utils/udfsdk/regr_avgx.h | 99 ++++++++++++++ 4 files changed, 725 insertions(+) create mode 100644 utils/udfsdk/avgx.cpp create mode 100644 utils/udfsdk/avgx.h create mode 100644 utils/udfsdk/regr_avgx.cpp create mode 100644 utils/udfsdk/regr_avgx.h diff --git a/utils/udfsdk/avgx.cpp b/utils/udfsdk/avgx.cpp new file mode 100644 index 000000000..887a8418e --- /dev/null +++ b/utils/udfsdk/avgx.cpp @@ -0,0 +1,257 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "avgx.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +#define DATATYPE double + +// Use the simple data model +struct avgx_data +{ + double sum; + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode avgx::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 1) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("avgx() with other than 1 arguments"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[0].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("avgx() with a non-numeric x argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(avgx_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 4); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode avgx::reset(mcsv1Context* context) +{ + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + data->sum = 0; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_x = valsIn[0].columnData; + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[0].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum += val; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct avgx_data* outData = (struct avgx_data*)context->getUserData()->data; + struct avgx_data* inData = (struct avgx_data*)userDataIn->data; + + outData->sum += inData->sum; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + + valOut = data->sum / (double)data->cnt; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_x = valsDropped[0].columnData; + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[0].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum -= val; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/avgx.h b/utils/udfsdk/avgx.h new file mode 100644 index 000000000..0569b6091 --- /dev/null +++ b/utils/udfsdk/avgx.h @@ -0,0 +1,99 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* avgx.h +***********************************************************************/ + +/** + * Columnstore interface for for the avgx function + * + * + * CREATE AGGREGATE FUNCTION avgx returns REAL soname + * 'libudf_mysql.so'; + * + */ +#ifndef HEADER_avgx +#define HEADER_avgx + +#include +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. + +// Return the avgx value of the dataset + +class avgx : public mcsv1_UDAF +{ +public: + // Defaults OK + avgx() : mcsv1_UDAF() {}; + virtual ~avgx() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_.h + diff --git a/utils/udfsdk/regr_avgx.cpp b/utils/udfsdk/regr_avgx.cpp new file mode 100644 index 000000000..c7cc5b56e --- /dev/null +++ b/utils/udfsdk/regr_avgx.cpp @@ -0,0 +1,270 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_avgx.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +#define DATATYPE double + +// Use the simple data model +struct regr_avgx_data +{ + double sum; + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode regr_avgx::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgx() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[1].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgx() with a non-numeric x argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_avgx_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[1].scale + 4); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_avgx::reset(mcsv1Context* context) +{ + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + data->sum = 0; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (context->isParamNull(0) || context->isParamNull(1)) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + if (valIn_x.empty() || valIn_y.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[1].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum += val; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_avgx_data* outData = (struct regr_avgx_data*)context->getUserData()->data; + struct regr_avgx_data* inData = (struct regr_avgx_data*)userDataIn->data; + + outData->sum += inData->sum; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + + if (data->cnt == 0) + { + valOut = 0; + } + else + { + valOut = data->sum / (double)data->cnt; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty() || valIn_y.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[1].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum -= val; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/regr_avgx.h b/utils/udfsdk/regr_avgx.h new file mode 100644 index 000000000..f70f30d8c --- /dev/null +++ b/utils/udfsdk/regr_avgx.h @@ -0,0 +1,99 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_avgx.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_avgx function + * + * + * CREATE AGGREGATE FUNCTION regr_avgx returns REAL soname + * 'libudf_mysql.so'; + * + */ +#ifndef HEADER_regr_avgx +#define HEADER_regr_avgx + +#include +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. + +// Return the regr_avgx value of the dataset + +class regr_avgx : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_avgx() : mcsv1_UDAF() {}; + virtual ~regr_avgx() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_avgx.h + From 40aca95a7ac83e00e7e12187d94910a2fba32fd8 Mon Sep 17 00:00:00 2001 From: David Hall Date: Mon, 14 May 2018 17:28:24 -0500 Subject: [PATCH 012/123] MCOL-1201 some fixes from testing --- dbcon/joblist/tupleaggregatestep.cpp | 229 ++++++++++++--------------- dbcon/mysql/ha_calpont_execplan.cpp | 1 - utils/common/common.vpj | 2 + utils/rowgroup/rowaggregation.cpp | 4 +- 4 files changed, 106 insertions(+), 130 deletions(-) diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index ff490da5b..8f7755ad9 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -852,7 +852,6 @@ SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo) if (ac->aggOp() == ROWAGG_UDAF) { UDAFColumn* udafc = dynamic_cast(ac); - if (udafc) { constAggDataVec.push_back( @@ -1097,8 +1096,9 @@ void TupleAggregateStep::prep1PhaseAggregate( vector functionVec; uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); + // For UDAF uint32_t projColsUDAFIndex = 0; - + UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // for count column of average function map avgFuncMap; @@ -1287,12 +1287,10 @@ void TupleAggregateStep::prep1PhaseAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -1300,12 +1298,10 @@ void TupleAggregateStep::prep1PhaseAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, i)); break; } - } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -1474,8 +1470,6 @@ void TupleAggregateStep::prep1PhaseAggregate( throw logic_error("(2)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } - pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); - // Return column oidsAgg.push_back(oidsProj[colProj]); keysAgg.push_back(key); @@ -1677,8 +1671,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( uint32_t bigIntWidth = sizeof(int64_t); // map key = column key, operation (enum), and UDAF pointer if UDAF. AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; set avgSet; + + // fOR udaf + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; uint32_t projColsUDAFIndex = 0; // for count column of average function @@ -1847,7 +1844,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; if (udafc) @@ -1857,12 +1854,10 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAgg)); break; } - } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -2142,6 +2137,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // locate the return column position in aggregated rowgroup for (uint64_t i = 0; i < returnedColVec.size(); i++) { + udafc = NULL; pUDAFFunc = NULL; uint32_t retKey = returnedColVec[i].first; RowAggFunctionType aggOp = functionIdMap(returnedColVec[i].second); @@ -2150,10 +2146,21 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != @@ -2473,26 +2480,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - - for (; it != jobInfo.projectionCols.end(); it++) - { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); - break; - } - } - - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(3)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); - } + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); } else { @@ -2904,7 +2892,10 @@ void TupleAggregateStep::prep2PhasesAggregate( vector > aggColVec; set avgSet; vector >& returnedColVec = jobInfo.returnedColVec; + // For UDAF uint32_t projColsUDAFIndex = 0; + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -2947,7 +2938,6 @@ void TupleAggregateStep::prep2PhasesAggregate( uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // associate the columns between projected RG and aggregate RG on PM // populated the aggregate columns @@ -3084,12 +3074,10 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3098,10 +3086,9 @@ void TupleAggregateStep::prep2PhasesAggregate( break; } } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -3350,10 +3337,6 @@ void TupleAggregateStep::prep2PhasesAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { - // Keep a count of the parms after the first for any aggregate. - // These will be skipped and the count needs to be subtracted - // from where the aux column will be. - int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap; @@ -3369,6 +3352,8 @@ void TupleAggregateStep::prep2PhasesAggregate( } // locate the return column position in aggregated rowgroup from PM + // outIdx is i without the multi-columns, + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { uint32_t retKey = returnedColVec[i].first; @@ -3379,19 +3364,30 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_MULTI_PARM) { // Skip on UM: Extra parms for an aggregate have no work on the UM - ++multiParms; continue; } + // Is this a UDAF? use the function as part of the key. - - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; - + pUDAFFunc = NULL; + udafc = NULL; if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); @@ -3492,7 +3488,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep2PhasesAggregate: " << emsg << " oid=" @@ -3514,7 +3510,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByUm[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByUm[j]->fOutputColumnIndex = i; + groupByUm[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByUm[j]->fOutputColumnIndex; } @@ -3525,7 +3521,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (jobInfo.distinctColVec[j] == retKey) { if (groupByUm[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByUm[j]->fOutputColumnIndex = i; + groupByUm[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByUm[j]->fOutputColumnIndex; } @@ -3534,7 +3530,7 @@ void TupleAggregateStep::prep2PhasesAggregate( // a duplicate group by column if (dupGroupbyIndex != -1) functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } else { @@ -3542,30 +3538,11 @@ void TupleAggregateStep::prep2PhasesAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - - for (; it != jobInfo.projectionCols.end(); it++) - { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i-multiParms)); - break; - } - } - - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); - } + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i-multiParms)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -3600,6 +3577,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (returnedColVec[i].second == AggregateColumn::AVG) avgFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // now fix the AVG function, locate the count(column) position @@ -3617,7 +3595,7 @@ void TupleAggregateStep::prep2PhasesAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size() - multiParms; + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -3724,7 +3702,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector > aggColVec, aggNoDistColVec; set avgSet, avgDistSet; vector >& returnedColVec = jobInfo.returnedColVec; + // For UDAF uint32_t projColsUDAFIndex = 0; + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -3796,7 +3777,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( uint32_t bigIntWidth = sizeof(int64_t); map, uint64_t> avgFuncDistMap; AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // associate the columns between projected RG and aggregate RG on PM // populated the aggregate columns @@ -3940,12 +3920,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3954,10 +3932,9 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( break; } } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -4201,32 +4178,33 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // associate the columns between the aggregate RGs on PM and UM without distinct aggregator // populated the returned columns { + int64_t multiParms = 0; + for (uint32_t idx = 0; idx < groupByPm.size(); idx++) { SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(idx, idx)); groupByUm.push_back(groupby); } - // Keep a count of the parms after the first for any aggregate. - // These will be skipped and the count needs to be subtracted - // from where the aux column will be. - int64_t multiParms = 0; for (uint32_t idx = 0; idx < functionVecPm.size(); idx++) - { SP_ROWAGG_FUNC_t funct; SP_ROWAGG_FUNC_t funcPm = functionVecPm[idx]; - // UDAF support if (funcPm->fAggFunction == ROWAGG_MULTI_PARM) { - // Multi-Parm is not used on the UM + // Skip on UM: Extra parms for an aggregate have no work on the UM ++multiParms; continue; } + if (funcPm->fAggFunction == ROWAGG_UDAF) { RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); + if (!udafFuncCol) + { + throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + } funct.reset(new RowUDAFFunctionCol( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, @@ -4273,6 +4251,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // These will be skipped and the count needs to be subtracted // from where the aux column will be. int64_t multiParms = 0; + projColsUDAFIndex = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4286,9 +4265,12 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // locate the return column position in aggregated rowgroup from PM + // outIdx is i without the multi-columns, + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { pUDAFFunc = NULL; + udafc = NULL; uint32_t retKey = returnedColVec[i].first; RowAggFunctionType aggOp = functionIdMap(returnedColVec[i].second); @@ -4304,10 +4286,21 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != @@ -4436,7 +4429,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it != aggFuncMap.end()) { - colUm = it->second - multiParms; + colUm = it->second; oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(keysAggUm[colUm]); scaleAggDist.push_back(scaleAggUm[colUm]); @@ -4460,7 +4453,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // false alarm returnColMissing = false; - colUm = it->second - multiParms; + colUm = it->second; if (aggOp == ROWAGG_SUM) { @@ -4528,7 +4521,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep2PhasesDistinctAggregate: " << emsg << " oid=" @@ -4552,7 +4545,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByNoDist[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByNoDist[j]->fOutputColumnIndex = i; + groupByNoDist[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByNoDist[j]->fOutputColumnIndex; } @@ -4561,7 +4554,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // a duplicate group by column if (dupGroupbyIndex != -1) functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } else { @@ -4569,30 +4562,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - - for (; it != jobInfo.projectionCols.end(); it++) - { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i-multiParms)); - break; - } - } - - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); - } + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i-multiParms)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -4629,6 +4603,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // for (i // now fix the AVG function, locate the count(column) position @@ -4646,7 +4621,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size() - multiParms; + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -4706,7 +4681,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(5)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 5c1989d51..7ee6a775b 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4573,7 +4573,6 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) udafc->resultType(ct); } } - } catch (std::logic_error e) { diff --git a/utils/common/common.vpj b/utils/common/common.vpj index 69059884c..ea67e04ba 100755 --- a/utils/common/common.vpj +++ b/utils/common/common.vpj @@ -200,6 +200,7 @@ + @@ -208,6 +209,7 @@ Name="Header Files" Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if"> + diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index c1f5bbd63..043dcaac2 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -2015,13 +2015,13 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { - int32_t paramCount = fRGContext.getParameterCount(); + uint32_t paramCount = fRGContext.getParameterCount(); // The vector of parameters to be sent to the UDAF mcsv1sdk::ColumnDatum valsIn[paramCount]; uint32_t dataFlags[paramCount]; execplan::CalpontSystemCatalog::ColDataType colDataType; - for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) + for (uint32_t i = 0; i < paramCount; ++i) { mcsv1sdk::ColumnDatum& datum = valsIn[i]; // Turn on NULL flags From bac8c2d43b1e623cfb4509fecd944824e1c4fb9f Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 15 May 2018 13:15:45 -0500 Subject: [PATCH 013/123] MCOL-1201 Modify docs. Fix group concat bug --- dbcon/mysql/ha_calpont_execplan.cpp | 1 + utils/udfsdk/docs/source/changelog.rst | 1 + .../docs/source/reference/ColumnDatum.rst | 6 ++-- .../docs/source/reference/MariaDBUDAF.rst | 2 +- .../udfsdk/docs/source/reference/UDAFMap.rst | 2 +- .../docs/source/reference/mcsv1Context.rst | 2 +- .../docs/source/reference/mcsv1_UDAF.rst | 36 ++++++++----------- utils/udfsdk/docs/source/usage/cmakelists.rst | 2 +- utils/udfsdk/docs/source/usage/compile.rst | 2 +- utils/udfsdk/docs/source/usage/headerfile.rst | 6 ++-- .../udfsdk/docs/source/usage/introduction.rst | 4 +-- utils/udfsdk/docs/source/usage/sourcefile.rst | 29 +++++++-------- utils/udfsdk/udfsdk.vpj | 33 +++++++++++++++++ 13 files changed, 75 insertions(+), 51 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 7ee6a775b..395d24404 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4165,6 +4165,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) rowCol->columnVec(selCols); (dynamic_cast(ac))->orderCols(orderCols); parm.reset(rowCol); + ac->aggParms().push_back(parm); if (gc->str_separator()) { diff --git a/utils/udfsdk/docs/source/changelog.rst b/utils/udfsdk/docs/source/changelog.rst index fcd93d54c..1a7c749f9 100644 --- a/utils/udfsdk/docs/source/changelog.rst +++ b/utils/udfsdk/docs/source/changelog.rst @@ -5,4 +5,5 @@ Version History | Version | Date | Changes | +=========+============+=============================+ | 1.1.0α | 2017-08-25 | - First alpha release | +| 1.2.0α | 2016-05-18 | - Add multi parm support | +---------+------------+-----------------------------+ diff --git a/utils/udfsdk/docs/source/reference/ColumnDatum.rst b/utils/udfsdk/docs/source/reference/ColumnDatum.rst index dd1006363..5304a2953 100644 --- a/utils/udfsdk/docs/source/reference/ColumnDatum.rst +++ b/utils/udfsdk/docs/source/reference/ColumnDatum.rst @@ -1,3 +1,5 @@ +.. _ColumnDatum: + ColumnDatum =========== @@ -13,7 +15,7 @@ Example for int data: int myint = valIn.cast(); -For multi-paramter aggregations (not available in Columnstore 1.1), the colsIn vector of next_value() contains the ordered set of row parameters. +For multi-paramter aggregations (not available in Columnstore 1.1), the colsIn array of next_value() contains the ordered set of row parameters. For char, varchar, text, varbinary and blob types, columnData will be std::string. @@ -59,7 +61,7 @@ The provided values are: * - SMALLINT - A signed two byte integer * - DECIMAL - - A Columnstore Decimal value. For Columnstore 1.1, this is stored in the smallest integer type field that will hold the required precision. + - A Columnstore Decimal value. This is stored in the smallest integer type field that will hold the required precision. * - MEDINT - A signed four byte integer * - INT diff --git a/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst b/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst index 1f6fa7acb..d031705d8 100644 --- a/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst +++ b/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst @@ -13,7 +13,7 @@ The library placed in mysql/lib is the name you use in the SQL CREATE AGGREGATE CREATE AGGREGATE FUNCTION ssq returns REAL soname 'libudf_mysql.so'; -Unlike the code you write for the Columnstore UDAF, MariaDB does not handle allocation and de-allocation of your memory structures. If writing your function for other engines, you must handle allocation and de-alloaction in :ref:`function_init ` and :ref:`function_deinit ` +Unlike the code you write for the Columnstore UDAF, MariaDB does not handle allocation and de-allocation of your memory structures in other engines. If writing your function for other engines, you must handle allocation and de-alloaction in :ref:`function_init ` and :ref:`function_deinit ` All of the MariaDB UDF and UDAF example functions are in a single source file named udfmysql.cpp and linked into libudf_mysql.so. diff --git a/utils/udfsdk/docs/source/reference/UDAFMap.rst b/utils/udfsdk/docs/source/reference/UDAFMap.rst index 48706bab3..d3cda63f4 100644 --- a/utils/udfsdk/docs/source/reference/UDAFMap.rst +++ b/utils/udfsdk/docs/source/reference/UDAFMap.rst @@ -3,7 +3,7 @@ UDAFMap ======= -The UDAFMap is where we tell the system about our function. For Columnstore 1.1, you must manually place your function into this map. +The UDAFMap is where we tell the system about our function. For Columnstore 1.2, you must manually place your function into this map. * open mcsv1_udaf.cpp * add your header to the #include list diff --git a/utils/udfsdk/docs/source/reference/mcsv1Context.rst b/utils/udfsdk/docs/source/reference/mcsv1Context.rst index 279220fb3..02adf57ab 100644 --- a/utils/udfsdk/docs/source/reference/mcsv1Context.rst +++ b/utils/udfsdk/docs/source/reference/mcsv1Context.rst @@ -150,7 +150,7 @@ Use these to determine the way your UDA(n)F was called .. c:function:: size_t getParameterCount() const; -:returns: the number of parameters to the function in the SQL query. Columnstore 1.1 only supports one parameter. +:returns: the number of parameters to the function in the SQL query. .. c:function:: bool isParamNull(int paramIdx); diff --git a/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst b/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst index 73c8f6570..f75fe73fc 100644 --- a/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst +++ b/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst @@ -1,4 +1,4 @@ -.. _ mcsv1_udaf: +.. _mcsv1_udaf: mcsv1_UDAF ========== @@ -11,12 +11,14 @@ The base class has no data members. It is designed to be only a container for yo However, adding static const members makes sense. -For UDAF (not Wndow Functions) Aggregation takes place in three stages: +For UDAF (not Window Functions) Aggregation takes place in three stages: * Subaggregation on the PM. nextValue() * Consolodation on the UM. subevaluate() * Evaluation of the function on the UM. evaluate() +There are situations where the system makes a choice to perform all UDAF calculations on the UM. The presence of group_concat() in the query and certain joins can cause the optimizer to make this choice. + For Window Functions, all aggregation occurs on the UM, and thus the subevaluate step is skipped. There is an optional dropValue() function that may be added. * Aggregation on the UM. nextValue() @@ -80,17 +82,11 @@ Callback Methods .. _init: -.. c:function:: ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); +.. c:function:: ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); :param context: The context object for this call. -:param colTypes: A list of the column types of the parameters. - - COL_TYPES is defined as:: - - typedef std::vector >COL_TYPES; - - In Columnstore 1.1, only one column is supported, so colTyoes will be of length one. +:param colTypes: A list of ColumnDatum structures. Use this to access the column types of the parameters. colTypes.columnData will be invalid. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS @@ -116,25 +112,23 @@ Callback Methods .. _nextvalue: -.. c:function:: ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); +.. c:function:: ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); :param context: The context object for this call -:param valsIn: a vector representing the values to be added for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. - +:param valsIn: an array representing the values to be added for each parameter for this row. + :returns: ReturnCode::ERROR or ReturnCode::SUCCESS Use context->getUserData() and type cast it to your UserData type or Simple Data Model stuct. nextValue() is called for each Window movement that passes the WHERE and HAVING clauses. The context's UserData will contain values that have been sub-aggregated to this point for the group, partition or Window Frame. nextValue is called on the PM for aggregation and on the UM for Window Functions. - When used in an aggregate, the function may not rely on order or completeness since the sub-aggregation is going on at the PM, it only has access to the data stored on the PM's dbroots. + When used in an aggregate, the function should not rely on order or completeness since the sub-aggregation is going on at the PM, it only has access to the data stored on the PM's dbroots. - When used as a analytic function (Window Function), nextValue is call for each Window movement in the Window. If dropValue is defined, then it may be called for every value leaving the Window, and nextValue called for each new value entering the Window. + When used as a analytic function (Window Function), nextValue is called for each Window movement in the Window. If dropValue is defined, then it may be called for every value leaving the Window, and nextValue called for each new value entering the Window. - Since this is called for every row, it is important that this method be efficient. + Since this may called for every row, it is important that this method be efficient. .. _subevaluate: @@ -172,13 +166,11 @@ Callback Methods .. _dropvalue: -.. c:function:: ReturnCode dropValue(mcsv1Context* context, std::vector& valsDropped); +.. c:function:: ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); :param context: The context object for this call -:param valsDropped: a vector representing the values to be dropped for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. +:param valsDropped: an array representing the values to be dropped for each parameter for this row. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS diff --git a/utils/udfsdk/docs/source/usage/cmakelists.rst b/utils/udfsdk/docs/source/usage/cmakelists.rst index 32a218459..a7ddacbaf 100644 --- a/utils/udfsdk/docs/source/usage/cmakelists.rst +++ b/utils/udfsdk/docs/source/usage/cmakelists.rst @@ -3,7 +3,7 @@ CMakeLists.txt ============== -For Columnstore 1.1, you compile your function by including it in the CMakeLists.txt file for the udfsdk. +For Columnstore 1.2, you compile your function by including it in the CMakeLists.txt file for the udfsdk. You need only add the new .cpp files to the udfsdk_LIB_SRCS target list:: diff --git a/utils/udfsdk/docs/source/usage/compile.rst b/utils/udfsdk/docs/source/usage/compile.rst index e6319e45b..b96af5d80 100644 --- a/utils/udfsdk/docs/source/usage/compile.rst +++ b/utils/udfsdk/docs/source/usage/compile.rst @@ -3,7 +3,7 @@ Compile ======= -To compile your function for Columnstore 1.1, simple recompile the udfsdk directory:: +To compile your function for Columnstore 1.2, simply recompile the udfsdk directory:: cd utils/usdsdk cmake . diff --git a/utils/udfsdk/docs/source/usage/headerfile.rst b/utils/udfsdk/docs/source/usage/headerfile.rst index 720acc5be..afb043e98 100644 --- a/utils/udfsdk/docs/source/usage/headerfile.rst +++ b/utils/udfsdk/docs/source/usage/headerfile.rst @@ -5,7 +5,7 @@ Header file Usually, each UDA(n)F function will have one .h and one .cpp file plus code for the mariadb UDAF plugin which may or may not be in a separate file. It is acceptable to put a set of related functions in the same files or use separate files for each. -The easiest way to create these files is to copy them an example closest to the type of function you intend to create. +The easiest way to create these files is to copy them from an example closest to the type of function you intend to create. Your header file must have a class defined that will implement your function. This class must be derived from mcsv1_UDAF and be in the mcsv1sdk namespace. The following examples use the "allnull" UDAF. @@ -29,9 +29,9 @@ allnull uses the Simple Data Model. See :ref:`complexdatamodel` to see how that allnull() : mcsv1_UDAF(){}; virtual ~allnull(){}; - virtual ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); virtual ReturnCode reset(mcsv1Context* context); - virtual ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn); virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); }; diff --git a/utils/udfsdk/docs/source/usage/introduction.rst b/utils/udfsdk/docs/source/usage/introduction.rst index 6b3544a1e..19c612caa 100644 --- a/utils/udfsdk/docs/source/usage/introduction.rst +++ b/utils/udfsdk/docs/source/usage/introduction.rst @@ -3,7 +3,7 @@ mcsv1_udaf Introduction mcsv1_udaf is a C++ API for writing User Defined Aggregate Functions (UDAF) and User Defined Analytic Functions (UDAnF) for the MariaDB Columstore engine. -In Columnstore 1.1.0, functions written using this API must be compiled into the udfsdk and udf_mysql libraries of the Columnstore code branch. +In Columnstore 1.2, functions written using this API must be compiled into the udfsdk and udf_mysql libraries of the Columnstore code branch. The API has a number of features. The general theme is, there is a class that represents the function, there is a context under which the function operates, and there is a data store for intermediate values. @@ -18,5 +18,5 @@ The steps required to create a function are: * :ref:`Compile udfsdk `. * :ref:`Copy the compiled libraries ` to the working directories. -In 1.1.0, Columnstore does not have a plugin framework, so the functions have to be compiled into the libraries that Columnstore already loads. +In 1.2, Columnstore does not have a plugin framework, so the functions have to be compiled into the libraries that Columnstore already loads. diff --git a/utils/udfsdk/docs/source/usage/sourcefile.rst b/utils/udfsdk/docs/source/usage/sourcefile.rst index b7ed38a32..5c43f29e4 100644 --- a/utils/udfsdk/docs/source/usage/sourcefile.rst +++ b/utils/udfsdk/docs/source/usage/sourcefile.rst @@ -34,21 +34,17 @@ Or, if using the :ref:`complexdatamodel`, type cast the UserData to your UserDat init() ------ -.. c:function:: ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); +.. c:function:: ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); :param context: The context object for this call. -:param colTypes: A list of the column types of the parameters. +:param colTypes: A list of the ColumnDatum used to access column types of the parameters. In init(), the columnData member is invalid. - COL_TYPES is defined as:: - - typedef std::vector >COL_TYPES; - - see :ref:`ColDataTypes `. In Columnstore 1.1, only one column is supported, so colTyoes will be of length one. + see :ref:`ColumnDatum`. In Columnstore 1.2, An arbitrary number of parameters is supported. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS -The init() method is where you sanity check the input, set the output type and set any run flags for this instance. init() is called one time from the mysqld process. All settings you do here are propagated through the system. +The init() method is where you sanity check the input datatypes, set the output type and set any run flags for this instance. init() is called one time from the mysqld process. All settings you do here are propagated through the system. init() is the exception to type casting the UserData member of context. UserData has not been created when init() is called, so you shouldn't use it here. @@ -60,13 +56,14 @@ If you're using :ref:`simpledatamodel`, you need to set the size of the structur .. rubric:: Check parameter count and type -Each function expects a certain number of columns to entered as parameters in the SQL query. For columnstore 1.1, the number of parameters is limited to one. +Each function expects a certain number of columns to be entered as parameters in the SQL query. It is possible to create a UDAF that accepts a variable number of parameters. You can discover which ones were actually used in init(), and modify your function's behavior accordingly. -colTypes is a vector of each parameter name and type. The name is the colum name from the SQL query. You can use this information to sanity check for compatible type(s) and also to modify your functions behavior based on type. To do this, add members to your data struct to be tested in the other Methods. Set these members based on colDataTypes (:ref:`ColDataTypes `). +colTypes is an array of ColumnData from which can be gleaned the type and name. The name is the column name from the SQL query. You can use this information to sanity check for compatible type(s) and also to modify your functions behavior based on type. To do this, add members to your data struct to be tested in the other Methods. Set these members based on colDataTypes (:ref:`ColDataTypes `). +The actual number of paramters passed can be gotten from context->getParameterCount(). :: - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -84,7 +81,7 @@ When you create your function using the SQL CREATE FUNCTION command, you must in .. rubric:: Set width and scale -If you have secial requirements, especially if you might be dealing with decimal types:: +If you have special requirements, especially if you might be dealing with decimal types:: context->setColWidth(8); context->setScale(context->getScale()*2); @@ -117,13 +114,11 @@ This function may be called multiple times from both the UM and the PM. Make no nextValue() ----------- -.. c:function:: ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); +.. c:function:: ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); :param context: The context object for this call -:param valsIn: a vector representing the values to be added for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. +:param valsIn: an array representing the values to be added for each parameter for this row. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS @@ -208,7 +203,7 @@ For AVG, you might see:: dropValue --------- -.. c:function:: ReturnCode dropValue(mcsv1Context* context, std::vector& valsDropped); +.. c:function:: ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); :param context: The context object for this call diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index fe1f3fd0e..3d3ac39ca 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -238,5 +238,38 @@ N="Makefile" Type="Makefile"/> + + + + + + + + + + + + + + + + + + + + + + + + + + + From 8f3faee25dd94ef95b126791d319da4cb007980d Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Wed, 23 May 2018 23:38:11 +0300 Subject: [PATCH 014/123] MCOL-1406 Fixed the regression. --- dbcon/ddlpackage/ddl.l | 4 ++-- dbcon/ddlpackage/ddl.y | 36 ++++++++++++++++++++---------------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/dbcon/ddlpackage/ddl.l b/dbcon/ddlpackage/ddl.l index f65ef161d..34d80e902 100644 --- a/dbcon/ddlpackage/ddl.l +++ b/dbcon/ddlpackage/ddl.l @@ -86,7 +86,7 @@ realfail2 ({integer}|{decimal})[Ee][-+] {identifier_quoted} { ddlget_lval(yyscanner)->str = scanner_copy( ddlget_text(yyscanner), yyscanner, STRIP_QUOTES ); return IDENT; } -{identifier_double_quoted} { ddlget_lval(yyscanner)->str = scanner_copy( ddlget_text(yyscanner), yyscanner, STRIP_QUOTES ); return IDENT; } +{identifier_double_quoted} { ddlget_lval(yyscanner)->str = scanner_copy( ddlget_text(yyscanner), yyscanner, STRIP_QUOTES ); return DQ_IDENT; } {fq_identifier} { ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner); return FQ_IDENT; } {fq_quoted} { ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner, STRIP_QUOTES_FQ); return FQ_IDENT; } {fq_double_quoted} { ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner, STRIP_QUOTES_FQ); return FQ_IDENT; } @@ -188,7 +188,7 @@ LONGTEXT {return LONGTEXT;} /* ignore */ } -{identifier} {ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner); return IDENT;} +{identifier} { ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner); return DQ_IDENT;} {self} { return ddlget_text(yyscanner)[0]; diff --git a/dbcon/ddlpackage/ddl.y b/dbcon/ddlpackage/ddl.y index 398a8612f..644cb9d9e 100644 --- a/dbcon/ddlpackage/ddl.y +++ b/dbcon/ddlpackage/ddl.y @@ -29,20 +29,13 @@ Understanding the New Sql book The postgress and mysql sources. find x -name \*.y -o -name \*.yy. - We don't support delimited identifiers. + We support quoted identifiers. All literals are stored as unconverted strings. You can't say "NOT DEFERRABLE". See the comment below. - This is not a reentrant parser. It uses the original global - variable style method of communication between the parser and - scanner. If we ever needed more than one parser thread per - processes, we would use the pure/reentrant options of bison and - flex. In that model, things that are traditionally global live - inside a struct that is passed around. We would need to upgrade to - a more recent version of flex. At the time of this writing, our - development systems have: flex version 2.5.4 + This is a reentrant parser. MCOL-66 Modify to be a reentrant parser */ @@ -122,7 +115,7 @@ REFERENCES RENAME RESTRICT SET SMALLINT TABLE TEXT TIME TINYBLOB TINYTEXT TINYINT TO UNIQUE UNSIGNED UPDATE USER SESSION_USER SYSTEM_USER VARCHAR VARBINARY VARYING WITH ZONE DOUBLE IDB_FLOAT REAL CHARSET IDB_IF EXISTS CHANGE TRUNCATE -%token FQ_IDENT IDENT FCONST SCONST CP_SEARCH_CONDITION_TEXT ICONST DATE +%token DQ_IDENT FQ_IDENT IDENT FCONST SCONST CP_SEARCH_CONDITION_TEXT ICONST DATE /* Notes: * 1. "ata" stands for alter_table_action @@ -206,6 +199,8 @@ VARYING WITH ZONE DOUBLE IDB_FLOAT REAL CHARSET IDB_IF EXISTS CHANGE TRUNCATE %type opt_if_exists %type opt_if_not_exists %type trunc_table_statement +%type rename_table_statement +%type ident %% stmtblock: stmtmulti { x->fParseTree = $1; } @@ -465,7 +460,7 @@ opt_equal: ; table_option: - ENGINE opt_equal IDENT {$$ = new pair("engine", $3);} + ENGINE opt_equal ident {$$ = new pair("engine", $3);} | MAX_ROWS opt_equal ICONST {$$ = new pair("max_rows", $3);} | @@ -480,9 +475,9 @@ table_option: $$ = new pair("auto_increment", $3); } | - DEFAULT CHARSET opt_equal IDENT {$$ = new pair("default charset", $4);} + DEFAULT CHARSET opt_equal ident {$$ = new pair("default charset", $4);} | - DEFAULT IDB_CHAR SET opt_equal IDENT {$$ = new pair("default charset", $5);} + DEFAULT IDB_CHAR SET opt_equal ident {$$ = new pair("default charset", $5);} ; alter_table_statement: @@ -625,7 +620,7 @@ qualified_name: else $$ = new QualifiedName($1); } - | IDENT { + | ident { if (x->fDBSchema.size()) $$ = new QualifiedName((char*)x->fDBSchema.c_str(), $1); else @@ -637,6 +632,11 @@ qualified_name: } ; +ident: + DQ_IDENT + | IDENT + ; + ata_add_column: /* See the documentation for SchemaObject for an explanation of why we are using * dynamic_cast here. @@ -649,11 +649,11 @@ ata_add_column: column_name: DATE - |IDENT + |ident ; constraint_name: - IDENT + ident ; column_option: @@ -713,6 +713,10 @@ default_clause: { $$ = new ColumnDefaultValue($2); } + | DEFAULT DQ_IDENT /* MCOL-1406 */ + { + $$ = new ColumnDefaultValue($2); + } | DEFAULT NULL_TOK {$$ = new ColumnDefaultValue(NULL);} | DEFAULT USER {$$ = new ColumnDefaultValue("$USER");} | DEFAULT CURRENT_USER {$$ = new ColumnDefaultValue("$CURRENT_USER");} From 04c87aca31945112769b5adade11ef1e305d4f17 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Fri, 25 May 2018 12:28:00 +0300 Subject: [PATCH 015/123] MCOL-1406 Removed unused non-terminal token. --- dbcon/ddlpackage/ddl.y | 1 - 1 file changed, 1 deletion(-) diff --git a/dbcon/ddlpackage/ddl.y b/dbcon/ddlpackage/ddl.y index 644cb9d9e..cf5893773 100644 --- a/dbcon/ddlpackage/ddl.y +++ b/dbcon/ddlpackage/ddl.y @@ -199,7 +199,6 @@ VARYING WITH ZONE DOUBLE IDB_FLOAT REAL CHARSET IDB_IF EXISTS CHANGE TRUNCATE %type opt_if_exists %type opt_if_not_exists %type trunc_table_statement -%type rename_table_statement %type ident %% From 0f617896d9b6fd82ef23dd5901133f1661e19866 Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 25 May 2018 12:56:29 -0500 Subject: [PATCH 016/123] MCOL-1201 Add support for UDAF multiple parm constants --- dbcon/execplan/constantcolumn.h | 2 + dbcon/joblist/jlf_common.cpp | 2 +- dbcon/joblist/joblistfactory.cpp | 10 +- dbcon/joblist/tupleaggregatestep.cpp | 106 +++++-- dbcon/joblist/windowfunctionstep.cpp | 9 +- dbcon/mysql/ha_calpont_execplan.cpp | 38 ++- .../primproc/batchprimitiveprocessor.cpp | 16 +- utils/common/any.hpp | 7 +- utils/loggingcpp/errorcodes.cpp | 2 +- utils/messageqcpp/bytestream.h | 1 + utils/rowgroup/rowaggregation.cpp | 198 +++++++++---- utils/rowgroup/rowaggregation.h | 36 ++- utils/udfsdk/allnull.h | 1 - utils/udfsdk/avg_mode.h | 1 - utils/udfsdk/avgx.h | 1 - utils/udfsdk/mcsv1_udaf.h | 1 - utils/udfsdk/median.h | 1 - utils/udfsdk/regr_avgx.cpp | 6 +- utils/udfsdk/regr_avgx.h | 1 - utils/udfsdk/ssq.h | 1 - utils/udfsdk/udfsdk.vpj | 33 --- utils/windowfunction/wf_udaf.cpp | 276 +++++++++++------- utils/windowfunction/wf_udaf.h | 2 - utils/windowfunction/windowfunctiontype.cpp | 24 +- utils/windowfunction/windowfunctiontype.h | 7 +- 25 files changed, 508 insertions(+), 274 deletions(-) diff --git a/dbcon/execplan/constantcolumn.h b/dbcon/execplan/constantcolumn.h index 04098faae..be0731044 100644 --- a/dbcon/execplan/constantcolumn.h +++ b/dbcon/execplan/constantcolumn.h @@ -38,6 +38,8 @@ class ByteStream; */ namespace execplan { +class ConstantColumn; + /** * @brief A class to represent a constant return column * diff --git a/dbcon/joblist/jlf_common.cpp b/dbcon/joblist/jlf_common.cpp index f5dbeee17..4b1980d49 100644 --- a/dbcon/joblist/jlf_common.cpp +++ b/dbcon/joblist/jlf_common.cpp @@ -405,7 +405,7 @@ uint32_t getTupleKey(JobInfo& jobInfo, const SRCP& srcp, bool add) if (add) { - // setTupleInfo first if add is ture, ok if already set. + // setTupleInfo first if add is true, ok if already set. const SimpleColumn* sc = dynamic_cast(srcp.get()); if (sc != NULL) diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index 4cf7bccc5..033bf2643 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -300,6 +300,7 @@ const JobStepVector doProject(const RetColsVector& retCols, JobInfo& jobInfo) { const ArithmeticColumn* ac = NULL; const FunctionColumn* fc = NULL; + const ConstantColumn* cc = NULL; uint64_t eid = -1; CalpontSystemCatalog::ColType ct; ExpressionStep* es = new ExpressionStep(jobInfo); @@ -316,6 +317,11 @@ const JobStepVector doProject(const RetColsVector& retCols, JobInfo& jobInfo) eid = fc->expressionId(); ct = fc->resultType(); } + else if ((cc = dynamic_cast(retCols[i].get())) != NULL) + { + eid = cc->expressionId(); + ct = cc->resultType(); + } else { std::ostringstream errmsg; @@ -1004,7 +1010,9 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo for (uint32_t parm = 0; parm < aggParms.size(); ++parm) { - if (aggc->constCol().get() != NULL) + // Only do the optimization of converting to count(*) if + // there is only one parameter. + if (aggParms.size() == 1 && aggc->constCol().get() != NULL) { // replace the aggregate on constant with a count(*) SRCP clone; diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 8f7755ad9..491f86a8f 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -1097,7 +1097,8 @@ void TupleAggregateStep::prep1PhaseAggregate( uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); // For UDAF - uint32_t projColsUDAFIndex = 0; + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // for count column of average function @@ -1286,11 +1287,11 @@ void TupleAggregateStep::prep1PhaseAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -1477,6 +1478,14 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(udafFuncCol->fUDAFContext.getPrecision()); typeAgg.push_back(udafFuncCol->fUDAFContext.getResultType()); widthAgg.push_back(udafFuncCol->fUDAFContext.getColWidth()); + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } @@ -1488,6 +1497,13 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); + // If the param is const + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; } break; @@ -1676,7 +1692,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // fOR udaf UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; - uint32_t projColsUDAFIndex = 0; + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; // for count column of average function map avgFuncMap, avgDistFuncMap; @@ -1840,12 +1857,12 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { @@ -2071,6 +2088,14 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(CalpontSystemCatalog::UBIGINT); widthAgg.push_back(sizeof(uint64_t)); funct->fAuxColumnIndex = colAgg++; + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } @@ -2083,6 +2108,13 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(widthProj[colProj]); ++colAgg; + // If the param is const + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; } break; @@ -2133,7 +2165,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i)); } - projColsUDAFIndex = 0; + projColsUDAFIdx = 0; // locate the return column position in aggregated rowgroup for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -2146,11 +2178,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -2893,7 +2925,8 @@ void TupleAggregateStep::prep2PhasesAggregate( set avgSet; vector >& returnedColVec = jobInfo.returnedColVec; // For UDAF - uint32_t projColsUDAFIndex = 0; + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; @@ -3073,11 +3106,11 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3305,6 +3338,14 @@ void TupleAggregateStep::prep2PhasesAggregate( typeAggPm.push_back(CalpontSystemCatalog::UBIGINT); widthAggPm.push_back(bigUintWidth); funct->fAuxColumnIndex = colAggPm++; + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } @@ -3317,6 +3358,13 @@ void TupleAggregateStep::prep2PhasesAggregate( typeAggPm.push_back(typeProj[colProj]); widthAggPm.push_back(width[colProj]); colAggPm++; + // If the param is const + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; } break; @@ -3342,7 +3390,7 @@ void TupleAggregateStep::prep2PhasesAggregate( map avgFuncMap; AGG_MAP aggDupFuncMap; - projColsUDAFIndex = 0; + projColsUDAFIdx = 0; // copy over the groupby vector // update the outputColumnIndex if returned for (uint64_t i = 0; i < groupByPm.size(); i++) @@ -3372,12 +3420,12 @@ void TupleAggregateStep::prep2PhasesAggregate( udafc = NULL; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3703,7 +3751,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( set avgSet, avgDistSet; vector >& returnedColVec = jobInfo.returnedColVec; // For UDAF - uint32_t projColsUDAFIndex = 0; + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; @@ -3919,11 +3968,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -4147,6 +4196,14 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( typeAggPm.push_back(CalpontSystemCatalog::UBIGINT); widthAggPm.push_back(sizeof(uint64_t)); funct->fAuxColumnIndex = colAggPm++; + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } @@ -4160,6 +4217,13 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( widthAggPm.push_back(width[colProj]); multiParmIndexes.push_back(colAggPm); colAggPm++; + // If the param is const + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; } break; @@ -4251,7 +4315,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // These will be skipped and the count needs to be subtracted // from where the aux column will be. int64_t multiParms = 0; - projColsUDAFIndex = 0; + projColsUDAFIdx = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4286,11 +4350,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); diff --git a/dbcon/joblist/windowfunctionstep.cpp b/dbcon/joblist/windowfunctionstep.cpp index 4d24f0b4b..2a93f680b 100644 --- a/dbcon/joblist/windowfunctionstep.cpp +++ b/dbcon/joblist/windowfunctionstep.cpp @@ -569,6 +569,7 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) for (RetColsVector::iterator i = jobInfo.windowCols.begin(); i < jobInfo.windowCols.end(); i++) { + bool isUDAF = false; // window function type WindowFunctionColumn* wc = dynamic_cast(i->get()); uint64_t ridx = getColumnIndex(*i, colIndexMap, jobInfo); // result index @@ -590,6 +591,7 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) // if (boost::iequals(wc->functionName(),"UDAF_FUNC") if (wc->functionName() == "UDAF_FUNC") { + isUDAF = true; ++wfsUserFunctionCount; } @@ -646,10 +648,13 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) // column type for functor templates int ct = 0; + if (isUDAF) + { + ct = wc->getUDAFContext().getResultType(); + } // make sure index is in range - if (fields.size() > 1 && fields[1] >= 0 && static_cast(fields[1]) < types.size()) + else if (fields.size() > 1 && fields[1] >= 0 && static_cast(fields[1]) < types.size()) ct = types[fields[1]]; - // workaround for functions using "within group (order by)" syntax string fn = boost::to_upper_copy(wc->functionName()); diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 701e1c14f..b02712409 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4206,8 +4206,8 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // treat as count(*) if (ac->aggOp() == AggregateColumn::COUNT) ac->aggOp(AggregateColumn::COUNT_ASTERISK); - - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + parm.reset(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError)); + ac->constCol(parm); break; } @@ -4485,17 +4485,20 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will // be applied in ExeMgr. When the ExeMgr fix is available, this checking // will be taken out. - if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + if (isp->sum_func() != Item_sum::UDF_SUM_FUNC) { - gwi.fatalParseError = true; - gwi.parseErrorText = "No project column found for aggregate function"; - if (ac) - delete ac; - return NULL; - } - else if (ac->constCol()) - { - gwi.count_asterisk_list.push_back(ac); + if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "No project column found for aggregate function"; + if (ac) + delete ac; + return NULL; + } + else if (ac->constCol()) + { + gwi.count_asterisk_list.push_back(ac); + } } // For UDAF, populate the context and call the UDAF init() function. @@ -7903,8 +7906,15 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i setError(gwi.thd, ER_INTERNAL_ERROR, gwi.parseErrorText, gwi); return ER_CHECK_NOT_IMPLEMENTED; } - - (*coliter)->aggParms().push_back(minSc); + // Replace the last (presumably constant) object with minSc + if ((*coliter)->aggParms().empty()) + { + (*coliter)->aggParms().push_back(minSc); + } + else + { + (*coliter)->aggParms()[0] = minSc; + } } std::vector::iterator funciter; diff --git a/primitives/primproc/batchprimitiveprocessor.cpp b/primitives/primproc/batchprimitiveprocessor.cpp index bc56a7430..019761d39 100644 --- a/primitives/primproc/batchprimitiveprocessor.cpp +++ b/primitives/primproc/batchprimitiveprocessor.cpp @@ -1677,15 +1677,11 @@ void BatchPrimitiveProcessor::execute() } catch (logging::QueryDataExcept& qex) { - ostringstream os; - os << qex.what() << endl; - writeErrorMsg(os.str(), qex.errorCode()); + writeErrorMsg(qex.what(), qex.errorCode()); } catch (logging::DictionaryBufferOverflow& db) { - ostringstream os; - os << db.what() << endl; - writeErrorMsg(os.str(), db.errorCode()); + writeErrorMsg(db.what(), db.errorCode()); } catch (scalar_exception& se) { @@ -1758,15 +1754,11 @@ void BatchPrimitiveProcessor::execute() } catch (IDBExcept& iex) { - ostringstream os; - os << iex.what() << endl; - writeErrorMsg(os.str(), iex.errorCode(), true, false); + writeErrorMsg(iex.what(), iex.errorCode(), true, false); } catch (const std::exception& ex) { - ostringstream os; - os << ex.what() << endl; - writeErrorMsg(os.str(), logging::batchPrimitiveProcessorErr); + writeErrorMsg(ex.what(), logging::batchPrimitiveProcessorErr); } catch (...) { diff --git a/utils/common/any.hpp b/utils/common/any.hpp index 5408c5c87..63d05d3d2 100755 --- a/utils/common/any.hpp +++ b/utils/common/any.hpp @@ -11,15 +11,12 @@ #include #include +#include namespace static_any { namespace anyimpl { - struct bad_any_cast - { - }; - struct empty_any { }; @@ -266,7 +263,7 @@ public: T& cast() { if (policy != anyimpl::get_policy()) - throw anyimpl::bad_any_cast(); + throw std::runtime_error("static_any: type mismatch in cast"); T* r = reinterpret_cast(policy->get_value(&object)); return *r; } diff --git a/utils/loggingcpp/errorcodes.cpp b/utils/loggingcpp/errorcodes.cpp index 60919c906..4b4196800 100644 --- a/utils/loggingcpp/errorcodes.cpp +++ b/utils/loggingcpp/errorcodes.cpp @@ -29,7 +29,7 @@ using namespace std; namespace logging { -ErrorCodes::ErrorCodes(): fErrorCodes(), fPreamble("An unexpected condition within the query caused an internal processing error within InfiniDB. Please check the log files for more details. Additional Information: ") +ErrorCodes::ErrorCodes(): fErrorCodes(), fPreamble("An unexpected condition within the query caused an internal processing error within Columnstore. Please check the log files for more details. Additional Information: ") { fErrorCodes[batchPrimitiveStepErr] = "error in BatchPrimitiveStep."; fErrorCodes[tupleBPSErr] = "error in TupleBPS."; diff --git a/utils/messageqcpp/bytestream.h b/utils/messageqcpp/bytestream.h index d1a3f4988..f8453843e 100644 --- a/utils/messageqcpp/bytestream.h +++ b/utils/messageqcpp/bytestream.h @@ -35,6 +35,7 @@ #include "exceptclasses.h" #include "serializeable.h" +#include "any.hpp" class ByteStreamTestSuite; diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 043dcaac2..6339554f1 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -1723,17 +1723,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(pFunctionCol.get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF, i); - } - else - { - throw logic_error("(3)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colOut + 1, i); break; } @@ -2012,31 +2002,60 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu fRow.setLongDoubleField(fRow.getLongDoubleField(colAux + 1) + valIn * valIn, colAux + 1); } -void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) +void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, + int64_t colAux, uint64_t& funcColsIdx) { uint32_t paramCount = fRGContext.getParameterCount(); // The vector of parameters to be sent to the UDAF mcsv1sdk::ColumnDatum valsIn[paramCount]; uint32_t dataFlags[paramCount]; - + ConstantColumn* cc; + bool bIsNull = false; execplan::CalpontSystemCatalog::ColDataType colDataType; for (uint32_t i = 0; i < paramCount; ++i) { + // If UDAF_IGNORE_NULLS is on, bIsNull gets set the first time + // we find a null. We still need to eat the rest of the parameters + // to sync updateEntry + if (bIsNull) + { + ++funcColsIdx; + continue; + } + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; mcsv1sdk::ColumnDatum& datum = valsIn[i]; // Turn on NULL flags dataFlags[i] = 0; - if (isNull(&fRowGroupIn, rowIn, colIn) == true) + + // If this particular parameter is a constant, then we need + // to acces the constant value rather than a row value. + cc = NULL; + if (pFunctionCol->fpConstCol) + { + cc = dynamic_cast(pFunctionCol->fpConstCol.get()); + } + + if ((cc && cc->type() == ConstantColumn::NULLDATA) + || (!cc && isNull(&fRowGroupIn, rowIn, colIn) == true)) { if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { - return; + bIsNull = true; + ++funcColsIdx; + continue; } dataFlags[i] |= mcsv1sdk::PARAM_IS_NULL; } - - colDataType = fRowGroupIn.getColTypes()[colIn]; - if (!fRGContext.isParamNull(i)) + + if (cc) + { + colDataType = cc->resultType().colDataType; + } + else + { + colDataType = fRowGroupIn.getColTypes()[colIn]; + } + if (!(dataFlags[i] & mcsv1sdk::PARAM_IS_NULL)) { switch (colDataType) { @@ -2045,13 +2064,38 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::MEDINT: case execplan::CalpontSystemCatalog::INT: case execplan::CalpontSystemCatalog::BIGINT: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + if (cc) + { + datum.columnData = cc->getIntVal(const_cast(rowIn), bIsNull); + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + } + break; + } case execplan::CalpontSystemCatalog::DECIMAL: case execplan::CalpontSystemCatalog::UDECIMAL: { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - datum.scale = fRowGroupIn.getScale()[colIn]; - datum.precision = fRowGroupIn.getPrecision()[colIn]; + datum.dataType = colDataType; + if (cc) + { + datum.columnData = cc->getDecimalVal(const_cast(rowIn), bIsNull).value; + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + } break; } @@ -2062,7 +2106,14 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::UBIGINT: { datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); + if (cc) + { + datum.columnData = cc->getUintVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getUintField(colIn); + } break; } @@ -2070,7 +2121,14 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::UDOUBLE: { datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; - datum.columnData = rowIn.getDoubleField(colIn); + if (cc) + { + datum.columnData = cc->getDoubleVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getDoubleField(colIn); + } break; } @@ -2078,22 +2136,55 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::UFLOAT: { datum.dataType = execplan::CalpontSystemCatalog::FLOAT; - datum.columnData = rowIn.getFloatField(colIn); + if (cc) + { + datum.columnData = cc->getFloatVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getFloatField(colIn); + } break; } case execplan::CalpontSystemCatalog::DATE: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + if (cc) + { + datum.columnData = cc->getDateIntVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getUintField(colIn); + } + break; + } case execplan::CalpontSystemCatalog::DATETIME: { datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); + if (cc) + { + datum.columnData = cc->getDatetimeIntVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getUintField(colIn); + } break; } case execplan::CalpontSystemCatalog::TIME: { datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); + if (cc) + { + datum.columnData = cc->getTimeIntVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getIntField(colIn); + } break; } @@ -2105,7 +2196,14 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::BLOB: { datum.dataType = colDataType; - datum.columnData = rowIn.getStringField(colIn); + if (cc) + { + datum.columnData = cc->getStrVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getStringField(colIn); + } break; } @@ -2147,6 +2245,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { + RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColsIdx].get()); rowUDAF->bInterrupted = true; throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } @@ -2443,17 +2542,7 @@ void RowAggregationUM::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); - } - else - { - throw logic_error("(5)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colAux, i); break; } @@ -3991,17 +4080,7 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); - } - else - { - throw logic_error("(6)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colAux, i); break; } @@ -4199,8 +4278,8 @@ void RowAggregationUMP2::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut // colAux(in) - Where the UDAF userdata resides // rowUDAF(in) - pointer to the RowUDAFFunctionCol for this UDAF instance //------------------------------------------------------------------------------ -void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) +void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, + int64_t colAux, uint64_t& funcColsIdx) { static_any::any valOut; @@ -4235,6 +4314,7 @@ void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { + RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColsIdx].get()); rowUDAF->bInterrupted = true; throw logging::IDBExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } @@ -4429,17 +4509,7 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); - } - else - { - throw logic_error("(7)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colAux, i); break; } diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index 282f354fc..14e4313cf 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -50,6 +50,7 @@ #include "stlpoolallocator.h" #include "returnedcolumn.h" #include "mcsv1_udaf.h" +#include "constantcolumn.h" // To do: move code that depends on joblist to a proper subsystem. namespace joblist @@ -200,6 +201,13 @@ struct RowAggFunctionCol // 4. for duplicate - point to the real aggretate column to be copied from // Set only on UM, the fAuxColumnIndex is defaulted to fOutputColumnIndex+1 on PM. uint32_t fAuxColumnIndex; + + // For UDAF that have more than one parameter and some parameters are constant. + // There will be a series of RowAggFunctionCol created, one for each parameter. + // The first will be a RowUDAFFunctionCol. Subsequent ones will be RowAggFunctionCol + // with fAggFunction == ROWAGG_MULTI_PARM. Order is important. + // If this parameter is constant, that value is here. + SRCP fpConstCol; }; @@ -220,8 +228,11 @@ struct RowUDAFFunctionCol : public RowAggFunctionCol inputColIndex, outputColIndex, auxColIndex), bInterrupted(false) {} - RowUDAFFunctionCol(const RowUDAFFunctionCol& rhs) : RowAggFunctionCol(ROWAGG_UDAF, ROWAGG_FUNCT_UNDEFINE, - rhs.fInputColumnIndex, rhs.fOutputColumnIndex, rhs.fAuxColumnIndex), fUDAFContext(rhs.fUDAFContext) + RowUDAFFunctionCol(const RowUDAFFunctionCol& rhs) : + RowAggFunctionCol(ROWAGG_UDAF, ROWAGG_FUNCT_UNDEFINE, rhs.fInputColumnIndex, + rhs.fOutputColumnIndex, rhs.fAuxColumnIndex), + fUDAFContext(rhs.fUDAFContext), + bInterrupted(false) {} virtual ~RowUDAFFunctionCol() {} @@ -238,6 +249,16 @@ inline void RowAggFunctionCol::serialize(messageqcpp::ByteStream& bs) const bs << (uint8_t)fAggFunction; bs << fInputColumnIndex; bs << fOutputColumnIndex; + if (fpConstCol) + { + bs << (uint8_t)1; + fpConstCol.get()->serialize(bs); + } + else + { + bs << (uint8_t)0; + } + } inline void RowAggFunctionCol::deserialize(messageqcpp::ByteStream& bs) @@ -245,6 +266,13 @@ inline void RowAggFunctionCol::deserialize(messageqcpp::ByteStream& bs) bs >> (uint8_t&)fAggFunction; bs >> fInputColumnIndex; bs >> fOutputColumnIndex; + uint8_t t; + bs >> t; + if (t) + { + fpConstCol.reset(new ConstantColumn); + fpConstCol.get()->unserialize(bs); + } } inline void RowUDAFFunctionCol::serialize(messageqcpp::ByteStream& bs) const @@ -586,7 +614,7 @@ protected: virtual void doAvg(const Row&, int64_t, int64_t, int64_t); virtual void doStatistics(const Row&, int64_t, int64_t, int64_t); virtual void doBitOp(const Row&, int64_t, int64_t, int); - virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); + virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, uint64_t& funcColsIdx); virtual bool countSpecial(const RowGroup* pRG) { fRow.setIntField<8>(fRow.getIntField<8>(0) + pRG->getRowCount(), 0); @@ -902,7 +930,7 @@ protected: void doStatistics(const Row&, int64_t, int64_t, int64_t); void doGroupConcat(const Row&, int64_t, int64_t); void doBitOp(const Row&, int64_t, int64_t, int); - void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); + void doUDAF(const Row&, int64_t, int64_t, int64_t, uint64_t& funcColsIdx); bool countSpecial(const RowGroup* pRG) { return false; diff --git a/utils/udfsdk/allnull.h b/utils/udfsdk/allnull.h index da17f5d6b..6a727caf6 100644 --- a/utils/udfsdk/allnull.h +++ b/utils/udfsdk/allnull.h @@ -48,7 +48,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/avg_mode.h b/utils/udfsdk/avg_mode.h index 5722c5fea..fba1fcdcc 100644 --- a/utils/udfsdk/avg_mode.h +++ b/utils/udfsdk/avg_mode.h @@ -56,7 +56,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/avgx.h b/utils/udfsdk/avgx.h index 0569b6091..a830c6803 100644 --- a/utils/udfsdk/avgx.h +++ b/utils/udfsdk/avgx.h @@ -35,7 +35,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index df3f47649..e09228d77 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -68,7 +68,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/median.h b/utils/udfsdk/median.h index 142be6ba8..48bd93c70 100644 --- a/utils/udfsdk/median.h +++ b/utils/udfsdk/median.h @@ -56,7 +56,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/regr_avgx.cpp b/utils/udfsdk/regr_avgx.cpp index c7cc5b56e..aec4f361f 100644 --- a/utils/udfsdk/regr_avgx.cpp +++ b/utils/udfsdk/regr_avgx.cpp @@ -82,7 +82,7 @@ mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* { return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. } - if (valIn_x.empty() || valIn_y.empty()) + if (valIn_x.empty() || valIn_y.empty()) // Usually empty if NULL. Probably redundant { return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. } @@ -107,10 +107,6 @@ mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* { val = valIn_x.cast(); } - else if (valIn_x.compatible(longTypeId)) - { - val = valIn_x.cast(); - } else if (valIn_x.compatible(llTypeId)) { val = valIn_x.cast(); diff --git a/utils/udfsdk/regr_avgx.h b/utils/udfsdk/regr_avgx.h index f70f30d8c..27b8708f7 100644 --- a/utils/udfsdk/regr_avgx.h +++ b/utils/udfsdk/regr_avgx.h @@ -35,7 +35,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/ssq.h b/utils/udfsdk/ssq.h index 2cac61c2c..e27ecf1fa 100644 --- a/utils/udfsdk/ssq.h +++ b/utils/udfsdk/ssq.h @@ -56,7 +56,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index 3d3ac39ca..fe1f3fd0e 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -238,38 +238,5 @@ N="Makefile" Type="Makefile"/> - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index 5cd5243c5..2876fbf7e 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -451,7 +451,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { mcsv1sdk::mcsv1_UDAF::ReturnCode rc; uint64_t colOut = fFieldIndex[0]; - + bool isNull = false; if ((fFrameUnit == WF__FRAME_ROWS) || (fPrev == -1) || (!fPeer->operator()(getPointer(fRowData->at(c)), getPointer(fRowData->at(fPrev))))) @@ -468,13 +468,24 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // Put the parameter metadata (type, scale, precision) into valsIn mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + ConstantColumn* cc = NULL; for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) { - uint64_t colIn = fFieldIndex[i+1]; mcsv1sdk::ColumnDatum& datum = valsIn[i]; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colIn); + cc = static_cast(fConstantParms[i].get()); + if (cc) + { + datum.dataType = cc->resultType().colDataType; + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + uint64_t colIn = fFieldIndex[i+1]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } } if (b <= c && c <= e) @@ -494,12 +505,14 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) uint32_t flags[getContext().getParameterCount()]; for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { + cc = static_cast(fConstantParms[k].get()); uint64_t colIn = fFieldIndex[k+1]; mcsv1sdk::ColumnDatum& datum = valsIn[k]; // Turn on Null flags or skip based on respect nulls flags[k] = 0; - if (fRow.isNullValue(colIn) == true) + if ((!cc && fRow.isNullValue(colIn) == true) + || (cc && cc->type() == ConstantColumn::NULLDATA)) { if (!bRespectNulls) { @@ -510,133 +523,196 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - // MCOL-1201 Multi-Paramter calls - switch (datum.dataType) + if (!bHasNull && !(flags[k] & mcsv1sdk::PARAM_IS_NULL)) { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DECIMAL: + switch (datum.dataType) { - int64_t valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + int64_t valIn; + if (cc) { - continue; + valIn = cc->getIntVal(fRow, isNull); } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - case CalpontSystemCatalog::UDECIMAL: - { - uint64_t valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + int64_t valIn; + if (cc) { - continue; + valIn = cc->getDecimalVal(fRow, isNull).value; } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - { - double valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + uint64_t valIn; + if (cc) { - continue; + valIn = cc->getUintVal(fRow, isNull); } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - { - float valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + double valIn; + if (cc) { - continue; + valIn = cc->getDoubleVal(fRow, isNull); } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::VARBINARY: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::BLOB: - { - string valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + float valIn; + if (cc) { - continue; + valIn = cc->getFloatVal(fRow, isNull); } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - default: - { - string errStr = "(" + colType2String[i] + ")"; - errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); - cerr << errStr << endl; - throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::BLOB: + { + string valIn; + if (cc) + { + valIn = cc->getStrVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - break; + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + default: + { + string errStr = "(" + colType2String[i] + ")"; + errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + + break; + } } } // Skip if any value is NULL and respect nulls is off. diff --git a/utils/windowfunction/wf_udaf.h b/utils/windowfunction/wf_udaf.h index f7a4c4b08..fc3f9006d 100644 --- a/utils/windowfunction/wf_udaf.h +++ b/utils/windowfunction/wf_udaf.h @@ -53,8 +53,6 @@ public: // A class to control the execution of User Define Analytic Functions (UDAnF) // as defined by a specialization of mcsv1sdk::mcsv1_UDAF -// The template parameter is currently only used to support DISTINCT, as -// as that is done via a set template class WF_udaf : public WindowFunctionType { diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp index 4c5b4de32..f5598a7e5 100644 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -39,7 +39,6 @@ using namespace logging; using namespace ordering; #include "calpontsystemcatalog.h" -#include "constantcolumn.h" #include "dataconvert.h" // int64_t IDB_pow[19] using namespace execplan; @@ -228,6 +227,9 @@ WindowFunctionType::makeWindowFunction(const string& name, int ct, WindowFunctio break; } + // Copy the only the constant parameter pointers + af->constParms(wc->functionParms()); + return af; } @@ -634,6 +636,26 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos) return v; } +void WindowFunctionType::constParms(const std::vector& functionParms) +{ + // fConstantParms will end up with a copy of functionParms, but only + // the constant types will be copied. Other types will take up space but + // be NULL. This allows us to acces the constants without the overhead + // of dynamic_cast for every row. + for (size_t i = 0; i < functionParms.size(); ++i) + { + ConstantColumn* cc = dynamic_cast(functionParms[i].get()); + if (cc) + { + fConstantParms.push_back(functionParms[i]); + } + else + { + fConstantParms.push_back(SRCP(cc)); + } + } +} + } //namespace // vim:ts=4 sw=4: diff --git a/utils/windowfunction/windowfunctiontype.h b/utils/windowfunction/windowfunctiontype.h index 50732d3b5..efa1c548a 100644 --- a/utils/windowfunction/windowfunctiontype.h +++ b/utils/windowfunction/windowfunctiontype.h @@ -31,7 +31,7 @@ #include "returnedcolumn.h" #include "rowgroup.h" #include "windowframe.h" - +#include "constantcolumn.h" namespace ordering { @@ -198,6 +198,8 @@ public: fStep = step; } + void constParms(const std::vector& functionParms); + static boost::shared_ptr makeWindowFunction(const std::string&, int ct, WindowFunctionColumn* wc); protected: @@ -244,6 +246,9 @@ protected: // output and input field indices: [0] - output std::vector fFieldIndex; + // constant function parameters -- needed for udaf with constant + std::vector fConstantParms; + // row meta data rowgroup::RowGroup fRowGroup; rowgroup::Row fRow; From b6424480c077411f8045b6374334fb196871d43b Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Tue, 29 May 2018 21:21:38 +0100 Subject: [PATCH 017/123] MCOL-1408 Fix HWM calculation for DML & API HWM for DML and API was being calculated using the first column in a table instead of the smallest column. This shifts the calculation to the correct column. --- writeengine/wrapper/we_colop.cpp | 8 +++---- writeengine/wrapper/writeengine.cpp | 36 ++++++++++++++--------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/writeengine/wrapper/we_colop.cpp b/writeengine/wrapper/we_colop.cpp index ac13a7bf0..fdaceef3e 100644 --- a/writeengine/wrapper/we_colop.cpp +++ b/writeengine/wrapper/we_colop.cpp @@ -208,7 +208,7 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent, //Find out where the rest rows go BRM::LBID_t startLbid; //need to put in a loop until newExtent is true - newExtent = dbRootExtentTrackers[0]->nextSegFile(dbRoot, partition, segment, newHwm, startLbid); + newExtent = dbRootExtentTrackers[column.colNo]->nextSegFile(dbRoot, partition, segment, newHwm, startLbid); TableMetaData* tableMetaData= TableMetaData::makeTableMetaData(tableOid); while (!newExtent) { @@ -223,7 +223,7 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent, for (i=0; i < dbRootExtentTrackers.size(); i++) { - if (i != 0) + if (i != column.colNo) dbRootExtentTrackers[i]->nextSegFile(dbRoot, partition, segment, newHwm, startLbid); // Round up HWM to the end of the current extent @@ -278,7 +278,7 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent, } tableMetaData->setColExtsInfo(newColStructList[i].dataOid, aColExtsInfo); } - newExtent = dbRootExtentTrackers[0]->nextSegFile(dbRoot, partition, segment, newHwm, startLbid); + newExtent = dbRootExtentTrackers[column.colNo]->nextSegFile(dbRoot, partition, segment, newHwm, startLbid); } } @@ -297,7 +297,7 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent, } rc = BRMWrapper::getInstance()->allocateStripeColExtents(cols, dbRoot, partition, segment, extents); - newHwm = extents[0].startBlkOffset; + newHwm = extents[column.colNo].startBlkOffset; if (rc != NO_ERROR) return rc; diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index 50d846e7c..7cb3ca85e 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -1505,6 +1505,19 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, for (i = 0; i < colStructList.size(); i++) Convertor::convertColType(&colStructList[i]); + // MCOL-984: find the smallest column width to calculate the RowID from so + // that all HWMs will be incremented by this operation + int32_t lowColLen = 8192; + int32_t colId = 0; + for (uint32_t colIt = 0; colIt < colStructList.size(); colIt++) + { + if (colStructList[colIt].colWidth < lowColLen) + { + colId = colIt; + lowColLen = colStructList[colId].colWidth; + } + } + // rc = checkValid(txnid, colStructList, colValueList, ridList); // if (rc != NO_ERROR) // return rc; @@ -1531,8 +1544,8 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, //-------------------------------------------------------------------------- if (isFirstBatchPm) { - currentDBrootIdx = dbRootExtentTrackers[0]->getCurrentDBRootIdx(); - extentInfo = dbRootExtentTrackers[0]->getDBRootExtentList(); + currentDBrootIdx = dbRootExtentTrackers[colId]->getCurrentDBRootIdx(); + extentInfo = dbRootExtentTrackers[colId]->getDBRootExtentList(); dbRoot = extentInfo[currentDBrootIdx].fDbRoot; partitionNum = extentInfo[currentDBrootIdx].fPartition; @@ -1698,7 +1711,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, } // if (isFirstBatchPm) else //get the extent info from tableMetaData { - ColExtsInfo aColExtsInfo = tableMetaData->getColExtsInfo(colStructList[0].dataOid); + ColExtsInfo aColExtsInfo = tableMetaData->getColExtsInfo(colStructList[colId].dataOid); ColExtsInfo::iterator it = aColExtsInfo.begin(); while (it != aColExtsInfo.end()) { @@ -1730,20 +1743,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, //-------------------------------------------------------------------------- // allocate row id(s) //-------------------------------------------------------------------------- - - // MCOL-984: find the smallest column width to calculate the RowID from so - // that all HWMs will be incremented by this operation - int32_t lowColLen = 8192; - int32_t colId = 0; - for (uint32_t colIt = 0; colIt < colStructList.size(); colIt++) - { - if (colStructList[colIt].colWidth < lowColLen) - { - colId = colIt; - lowColLen = colStructList[colId].colWidth; - curColStruct = colStructList[colId]; - } - } + curColStruct = colStructList[colId]; colOp = m_colOp[op(curColStruct.fCompressionType)]; colOp->initColumn(curCol); @@ -1765,7 +1765,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, if (it != aColExtsInfo.end()) { hwm = it->hwm; - //cout << "Got from colextinfo hwm for oid " << colStructList[0].dataOid << " is " << hwm << " and seg is " << colStructList[0].fColSegment << endl; + //cout << "Got from colextinfo hwm for oid " << colStructList[colId].dataOid << " is " << hwm << " and seg is " << colStructList[colId].fColSegment << endl; } oldHwm = hwm; //Save this info for rollback From 4fe399e5c0e17eadfd2b0adce07e95a5c439cc1d Mon Sep 17 00:00:00 2001 From: david hill Date: Wed, 30 May 2018 15:46:58 -0500 Subject: [PATCH 018/123] MCOL-1370 - auto-failure, dont switch ebs when detahc fails --- oam/oamcpp/liboamcpp.cpp | 79 ++++++++++++++++++++++++++++++++++++++++ oam/oamcpp/liboamcpp.h | 2 + procmgr/main.cpp | 20 +++++++++- 3 files changed, 100 insertions(+), 1 deletion(-) diff --git a/oam/oamcpp/liboamcpp.cpp b/oam/oamcpp/liboamcpp.cpp index 649d86f13..0536fbacb 100644 --- a/oam/oamcpp/liboamcpp.cpp +++ b/oam/oamcpp/liboamcpp.cpp @@ -5477,6 +5477,21 @@ namespace oam exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); } + //detach first to make sure DBS can be detach before trying to move to another pm + try + { + typedef std::vector dbrootList; + dbrootList dbrootlist; + dbrootlist.push_back(itoa(dbrootID)); + + amazonDetach(dbrootlist); + } + catch (exception& ) + { + writeLog("ERROR: amazonDetach failure", LOG_TYPE_ERROR ); + exceptionControl("autoMovePmDbroot", API_DETACH_FAILURE); + } + //get dbroot id for other PMs systemStorageInfo_t t; DeviceDBRootList moduledbrootlist; @@ -9644,6 +9659,69 @@ namespace oam } /*************************************************************************** + * + * Function: amazonDetach + * + * Purpose: Amazon EC2 volume deattach needed + * + ****************************************************************************/ + + void Oam::amazonDetach(dbrootList dbrootConfigList) + { + //if amazon cloud with external volumes, do the detach/attach moves + string cloud; + string DBRootStorageType; + try { + getSystemConfig("Cloud", cloud); + getSystemConfig("DBRootStorageType", DBRootStorageType); + } + catch(...) {} + + if ( (cloud == "amazon-ec2" || cloud == "amazon-vpc") && + DBRootStorageType == "external" ) + { + writeLog("amazonDetach function started ", LOG_TYPE_DEBUG ); + + dbrootList::iterator pt3 = dbrootConfigList.begin(); + for( ; pt3 != dbrootConfigList.end() ; pt3++) + { + string dbrootid = *pt3; + string volumeNameID = "PMVolumeName" + dbrootid; + string volumeName = oam::UnassignedName; + string deviceNameID = "PMVolumeDeviceName" + dbrootid; + string deviceName = oam::UnassignedName; + try { + getSystemConfig( volumeNameID, volumeName); + getSystemConfig( deviceNameID, deviceName); + } + catch(...) + {} + + if ( volumeName == oam::UnassignedName || deviceName == oam::UnassignedName ) + { + cout << " ERROR: amazonDetach, invalid configure " + volumeName + ":" + deviceName << endl; + writeLog("ERROR: amazonDetach, invalid configure " + volumeName + ":" + deviceName, LOG_TYPE_ERROR ); + exceptionControl("amazonDetach", API_INVALID_PARAMETER); + } + + //send msg to to-pm to umount volume + int returnStatus = sendMsgToProcMgr(UNMOUNT, dbrootid, FORCEFUL, ACK_YES); + if (returnStatus != API_SUCCESS) { + writeLog("ERROR: amazonDetach, umount failed on " + dbrootid, LOG_TYPE_ERROR ); + } + + if (!detachEC2Volume(volumeName)) { + cout << " ERROR: amazonDetach, detachEC2Volume failed on " + volumeName << endl; + writeLog("ERROR: amazonDetach, detachEC2Volume failed on " + volumeName , LOG_TYPE_ERROR ); + exceptionControl("amazonDetach", API_FAILURE); + } + + writeLog("amazonDetach, detachEC2Volume passed on " + volumeName , LOG_TYPE_DEBUG ); + } + } + } + + /*************************************************************************** * * Function: amazonReattach * @@ -9736,6 +9814,7 @@ namespace oam } } + /*************************************************************************** * * Function: mountDBRoot diff --git a/oam/oamcpp/liboamcpp.h b/oam/oamcpp/liboamcpp.h index 51c1f773c..fdfa7fe40 100644 --- a/oam/oamcpp/liboamcpp.h +++ b/oam/oamcpp/liboamcpp.h @@ -229,6 +229,7 @@ namespace oam API_CONN_REFUSED, API_CANCELLED, API_STILL_WORKING, + API_DETACH_FAILURE, API_MAX }; @@ -2432,6 +2433,7 @@ namespace oam void amazonReattach(std::string toPM, dbrootList dbrootConfigList, bool attach = false); void mountDBRoot(dbrootList dbrootConfigList, bool mount = true); + void amazonDetach(dbrootList dbrootConfigList); /** *@brief gluster control diff --git a/procmgr/main.cpp b/procmgr/main.cpp index 118ac0d73..bf9ff8d67 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -1922,7 +1922,7 @@ void pingDeviceThread() if ( PrimaryUMModuleName == moduleName ) downPrimaryUM = true; - // if not disabled and amazon, skip + // if disabled, skip if (opState != oam::AUTO_DISABLED ) { //Log failure, issue alarm, set moduleOpState @@ -1968,6 +1968,7 @@ void pingDeviceThread() if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) || ( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) || ( moduleName.find("pm") == 0 && amazon && AmazonPMFailover == "y") ) { + string error; try { log.writeLog(__LINE__, "Call autoMovePmDbroot", LOG_TYPE_DEBUG); oam.autoMovePmDbroot(moduleName); @@ -1984,6 +1985,23 @@ void pingDeviceThread() { log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: Caught unknown exception!", LOG_TYPE_ERROR); } + + if ( error == OAM::API_DETACH_FAILURE ) + { + processManager.setModuleState(moduleName, oam::AUTO_DISABLED); + + // resume the dbrm + oam.dbrmctl("resume"); + log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); + + //enable query stats + dbrm.setSystemQueryReady(true); + + //set query system state ready + processManager.setQuerySystemState(true); + + break; + } } } From b2314f152d88534fd5f999350e9c60e106c7d408 Mon Sep 17 00:00:00 2001 From: david hill Date: Wed, 30 May 2018 16:16:42 -0500 Subject: [PATCH 019/123] MCOL-1370 --- oam/oamcpp/liboamcpp.cpp | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/oam/oamcpp/liboamcpp.cpp b/oam/oamcpp/liboamcpp.cpp index 0536fbacb..b6fb8dc2d 100644 --- a/oam/oamcpp/liboamcpp.cpp +++ b/oam/oamcpp/liboamcpp.cpp @@ -5478,18 +5478,24 @@ namespace oam } //detach first to make sure DBS can be detach before trying to move to another pm - try + DBRootConfigList::iterator pt3 = residedbrootConfigList.begin(); + for( ; pt3 != residedbrootConfigList.end() ; ) { - typedef std::vector dbrootList; - dbrootList dbrootlist; - dbrootlist.push_back(itoa(dbrootID)); + int dbrootID = *pt3; - amazonDetach(dbrootlist); - } - catch (exception& ) - { - writeLog("ERROR: amazonDetach failure", LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_DETACH_FAILURE); + try + { + typedef std::vector dbrootList; + dbrootList dbrootlist; + dbrootlist.push_back(itoa(dbrootID)); + + amazonDetach(dbrootlist); + } + catch (exception& ) + { + writeLog("ERROR: amazonDetach failure", LOG_TYPE_ERROR ); + exceptionControl("autoMovePmDbroot", API_DETACH_FAILURE); + } } //get dbroot id for other PMs From 5226833dd4579238fd8cdf0bf2f4461e8c072547 Mon Sep 17 00:00:00 2001 From: david hill Date: Wed, 30 May 2018 16:27:33 -0500 Subject: [PATCH 020/123] MCOL-1370 --- procmgr/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/procmgr/main.cpp b/procmgr/main.cpp index bf9ff8d67..49443de8d 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -1986,7 +1986,7 @@ void pingDeviceThread() log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: Caught unknown exception!", LOG_TYPE_ERROR); } - if ( error == OAM::API_DETACH_FAILURE ) + if ( error == oam.itoa(oam::API_DETACH_FAILURE) ) { processManager.setModuleState(moduleName, oam::AUTO_DISABLED); From efbf297eb75fb4b3e9d8edc2e195631744494be8 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Thu, 31 May 2018 10:45:22 +0300 Subject: [PATCH 021/123] MCOL-1384 Backport the MCOL-573 feature to 1.1. Change msg type to avoid server code assert violation. --- dbcon/ddlpackage/ddl.l | 61 ++-------------------------------- dbcon/ddlpackage/ddl.y | 18 ++-------- dbcon/mysql/ha_calpont_ddl.cpp | 4 +-- 3 files changed, 6 insertions(+), 77 deletions(-) diff --git a/dbcon/ddlpackage/ddl.l b/dbcon/ddlpackage/ddl.l index 34d80e902..6eeaafb0b 100644 --- a/dbcon/ddlpackage/ddl.l +++ b/dbcon/ddlpackage/ddl.l @@ -32,7 +32,7 @@ #endif using namespace ddlpackage; -typedef enum { NOOP, STRIP_QUOTES, STRIP_QUOTES_FQ } copy_action_t; +typedef enum { NOOP, STRIP_QUOTES } copy_action_t; int lineno = 1; void ddlerror(struct pass_to_bison* x, char const *s); @@ -72,8 +72,6 @@ identifier {ident_start}{ident_cont}* fq_identifier {identifier}\.{identifier} identifier_quoted {grave_accent}{identifier}{grave_accent} identifier_double_quoted {double_quote}{identifier}{double_quote} -fq_quoted ({identifier_quoted}|{identifier})\.({identifier_quoted}|{identifier}) -fq_double_quoted ({identifier_double_quoted}|{identifier})\.({identifier_double_quoted}|{identifier}) integer [-+]?{digit}+ decimal ([-+]?({digit}*\.{digit}+)|({digit}+\.{digit}*)) @@ -87,9 +85,6 @@ realfail2 ({integer}|{decimal})[Ee][-+] {identifier_quoted} { ddlget_lval(yyscanner)->str = scanner_copy( ddlget_text(yyscanner), yyscanner, STRIP_QUOTES ); return IDENT; } {identifier_double_quoted} { ddlget_lval(yyscanner)->str = scanner_copy( ddlget_text(yyscanner), yyscanner, STRIP_QUOTES ); return DQ_IDENT; } -{fq_identifier} { ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner); return FQ_IDENT; } -{fq_quoted} { ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner, STRIP_QUOTES_FQ); return FQ_IDENT; } -{fq_double_quoted} { ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner, STRIP_QUOTES_FQ); return FQ_IDENT; } ACTION {return ACTION;} ADD {return ADD;} @@ -195,7 +190,7 @@ LONGTEXT {return LONGTEXT;} } {grave_accent} { - /* ignore */ + return ddlget_text(yyscanner)[0]; } %% @@ -273,7 +268,6 @@ char* scanner_copy (char *str, yyscan_t yyscanner, copy_action_t action) char* result; char* nv = strdup(str); result = nv; - // free strduped memory later to prevent possible memory leak if(nv) ((scan_data*)ddlget_extra(yyscanner))->valbuf.push_back(nv); @@ -283,57 +277,6 @@ char* scanner_copy (char *str, yyscan_t yyscanner, copy_action_t action) nv[strlen(str) - 1] = '\0'; result = nv + 1; } - else if (action == STRIP_QUOTES_FQ) - { - bool move_left = false; - bool move_right = false; - char* left = nv; - char* tmp_first = nv; - // MCOL-1384 Loop through all comas in this quoted fq id - // looking for $quote_sign.$quote_sign sequence. - char* fq_delimiter; - int tmp_pos = 0; - while((fq_delimiter = strchr(tmp_first, '.')) != NULL) - { - if( (*(fq_delimiter -1) == '`' && *(fq_delimiter + 1) == '`') || - (*(fq_delimiter -1) == '"' && *(fq_delimiter + 1) == '"') ) - { - tmp_pos += fq_delimiter - tmp_first; - break; - } - tmp_first = fq_delimiter; - } - - char* fq_delimiter_orig = str + tmp_pos; - char* right = fq_delimiter + 1; - char* right_orig = fq_delimiter_orig + 1; - // MCOL-1384 Strip quotes from the left part. - if(*left == '"' || *left == '`') - { - result = left + 1; - *(fq_delimiter - 1) = '.'; - move_left = true; - } - else - { - fq_delimiter += 1; - } - - int right_length = strlen(right); - // MCOL-1384 Strip quotes from the right part. - if(*right == '`' || *right == '"') - { - right += 1; right_orig += 1; - right_length -= 2; - move_right = true; - *(fq_delimiter + right_length) = '\0'; - } - - if(move_left || move_right) - { - strncpy(fq_delimiter, right_orig, right_length); - } - } return result; } diff --git a/dbcon/ddlpackage/ddl.y b/dbcon/ddlpackage/ddl.y index cf5893773..96867cfb8 100644 --- a/dbcon/ddlpackage/ddl.y +++ b/dbcon/ddlpackage/ddl.y @@ -41,7 +41,6 @@ */ %{ -#include "string.h" #include "sqlparser.h" #ifdef _MSC_VER @@ -115,7 +114,7 @@ REFERENCES RENAME RESTRICT SET SMALLINT TABLE TEXT TIME TINYBLOB TINYTEXT TINYINT TO UNIQUE UNSIGNED UPDATE USER SESSION_USER SYSTEM_USER VARCHAR VARBINARY VARYING WITH ZONE DOUBLE IDB_FLOAT REAL CHARSET IDB_IF EXISTS CHANGE TRUNCATE -%token DQ_IDENT FQ_IDENT IDENT FCONST SCONST CP_SEARCH_CONDITION_TEXT ICONST DATE +%token DQ_IDENT IDENT FCONST SCONST CP_SEARCH_CONDITION_TEXT ICONST DATE /* Notes: * 1. "ata" stands for alter_table_action @@ -606,26 +605,13 @@ table_name: ; qualified_name: - FQ_IDENT { - char* delimeterPosition = strchr(const_cast($1), '.'); - if( delimeterPosition ) - { - *delimeterPosition = '\0'; - char* schemaName = const_cast($1); - char* tableName = delimeterPosition + 1; - $$ = new QualifiedName(schemaName, tableName); - *delimeterPosition = '.'; - } - else - $$ = new QualifiedName($1); - } | ident { if (x->fDBSchema.size()) $$ = new QualifiedName((char*)x->fDBSchema.c_str(), $1); else $$ = new QualifiedName($1); } - | IDENT '.' IDENT + | ident '.' ident { $$ = new QualifiedName($1, $3); } diff --git a/dbcon/mysql/ha_calpont_ddl.cpp b/dbcon/mysql/ha_calpont_ddl.cpp index 74b413667..9e757e426 100755 --- a/dbcon/mysql/ha_calpont_ddl.cpp +++ b/dbcon/mysql/ha_calpont_ddl.cpp @@ -2083,7 +2083,7 @@ int ha_calpont_impl_rename_table_(const char* from, const char* to, cal_connecti int rc = ProcessDDLStatement(stmt, db, "", tid2sid(thd->thread_id), emsg); if (rc != 0) - push_warning(thd, Sql_condition::WARN_LEVEL_ERROR, 9999, emsg.c_str()); + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, 9999, emsg.c_str()); return rc; } @@ -2123,7 +2123,7 @@ long long calonlinealter(UDF_INIT* initid, UDF_ARGS* args, int rc = ProcessDDLStatement(stmt, db, "", tid2sid(thd->thread_id), emsg, compressiontype); if (rc != 0) - push_warning(thd, Sql_condition::WARN_LEVEL_ERROR, 9999, emsg.c_str()); + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, 9999, emsg.c_str()); return rc; } From 1a1f3ea5060e2aef699f424ac4123a48f8464c47 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Wed, 23 May 2018 15:06:16 +0300 Subject: [PATCH 022/123] MCOL-1449. Fix the regression caused by changes in idborderby code for MCOL-1052. Add LIMIT and OFFSET support for group by handler. --- dbcon/joblist/limitedorderby.cpp | 15 +++++---------- dbcon/mysql/ha_calpont_execplan.cpp | 29 +++++++++++++++++++++++++++++ dbcon/mysql/ha_calpont_impl.cpp | 15 ++++++++++----- utils/windowfunction/idborderby.h | 3 ++- 4 files changed, 46 insertions(+), 16 deletions(-) diff --git a/dbcon/joblist/limitedorderby.cpp b/dbcon/joblist/limitedorderby.cpp index 82d6041a8..4f3366cc6 100644 --- a/dbcon/joblist/limitedorderby.cpp +++ b/dbcon/joblist/limitedorderby.cpp @@ -77,14 +77,9 @@ void LimitedOrderBy::initialize(const RowGroup& rg, const JobInfo& jobInfo) map::iterator j = keyToIndexMap.find(i->first); idbassert(j != keyToIndexMap.end()); - // MCOL-1052 Ordering direction in CSEP differs from - // internal direction representation. - if (i->second) - fOrderByCond.push_back(IdbSortSpec(j->second, false)); - else - fOrderByCond.push_back(IdbSortSpec(j->second, true)); - - //fOrderByCond.push_back(IdbSortSpec(j->second, i->second)); + // TODO Ordering direction in CSEP differs from + // internal direction representation. This behavior should be fixed + fOrderByCond.push_back(IdbSortSpec(j->second, i->second)); } // limit row count info @@ -182,8 +177,8 @@ void LimitedOrderBy::finalize() if (fRowGroup.getRowCount() > 0) fDataQueue.push(fData); - // MCOL-1052 The removed check effectivly disables sorting to happen, - // since fStart = 0; + // MCOL-1052 The removed check effectively disables sorting, + // since fStart = 0 if there is no OFFSET; if (true) { uint64_t newSize = fRowsPerRG * fRowGroup.getRowSize(); diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index fac0cd032..14a7786a1 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -8009,6 +8009,10 @@ int cp_get_group_plan(THD* thd, SCSEP& csep, cal_impl_if::cal_group_info& gi) gwi.thd = thd; int status = getGroupPlan(gwi, select_lex, csep, gi); + cerr << "---------------- cp_get_group_plan EXECUTION PLAN ----------------" << endl; + cerr << *csep << endl ; + cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; + if (status > 0) return ER_INTERNAL_ERROR; else if (status < 0) @@ -9877,6 +9881,31 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro select_query += ord_cols; } } + // LIMIT and OFFSET are extracted from TABLE_LIST elements. + // All of JOIN-ed tables contain relevant limit and offset. + if (gi.groupByTables->select_lex->select_limit) + { + csep->limitNum(((Item_int*)gi.groupByTables->select_lex->select_limit)->val_int()); + } + + if (gi.groupByTables->select_lex->offset_limit) + { + csep->limitStart(((Item_int*)gi.groupByTables->select_lex->offset_limit)->val_int()); + } + + //gwi.thd->infinidb_vtable.select_vtable_query.free(); + //gwi.thd->infinidb_vtable.select_vtable_query.append(select_query.c_str(), select_query.length()); + + // We don't currently support limit with correlated subquery + if (csep->limitNum() != (uint64_t) - 1 && + gwi.subQuery && !gwi.correlatedTbNameVec.empty()) + { + gwi.fatalParseError = true; + gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_LIMIT_SUB); + setError(gwi.thd, ER_INTERNAL_ERROR, gwi.parseErrorText, gwi); + return ER_CHECK_NOT_IMPLEMENTED; + } + } // ORDER BY processing ends here if ( gi.groupByDistinct ) diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 5ca94562b..49df3bde3 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -5581,14 +5581,19 @@ internal_error: */ /*********************************************************** * DESCRIPTION: - * Return a result record for each group_by_handler::next_row() call. + * Return a result record for each + * group_by_handler::next_row() call. * PARAMETERS: - * group_hand - group by handler, that preserves initial table and items lists. . - * table - TABLE pointer The table to save the result set in. + * group_hand - group by handler, that preserves initial + * table and items lists. . + * table - TABLE pointer The table to save the result + * set in. * RETURN: * 0 if success - * HA_ERR_END_OF_FILE if the record set has come to an end - * others if something went wrong whilst getting the result set + * HA_ERR_END_OF_FILE if the record set has come to + * an end + * others if something went wrong whilst getting the + * result set ***********************************************************/ int ha_calpont_impl_group_by_next(ha_calpont_group_by_handler* group_hand, TABLE* table) { diff --git a/utils/windowfunction/idborderby.h b/utils/windowfunction/idborderby.h index 91db95366..38b31deb9 100644 --- a/utils/windowfunction/idborderby.h +++ b/utils/windowfunction/idborderby.h @@ -59,7 +59,8 @@ class IdbCompare; struct IdbSortSpec { int fIndex; - int fAsc; // ::= ASC | DESC + // TODO There are three ordering specs since 10.2 + int fAsc; // ::= ASC | DESC int fNf; // ::= NULLS FIRST | NULLS LAST IdbSortSpec() : fIndex(-1), fAsc(1), fNf(1) {} From 1d8964ec0d19540f5030168e04b81882bf6613e4 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Thu, 31 May 2018 14:44:48 +0100 Subject: [PATCH 023/123] Fix code style --- dbcon/joblist/tupleunion.cpp | 2 +- dbcon/mysql/ha_calpont_ddl.cpp | 2 +- dbcon/mysql/ha_calpont_execplan.cpp | 7 ++++--- dbcon/mysql/ha_calpont_impl.cpp | 8 ++++---- utils/dataconvert/dataconvert.cpp | 1 + utils/funcexp/func_add_time.cpp | 1 + utils/windowfunction/idborderby.h | 2 +- writeengine/splitter/we_cmdargs.h | 5 ++++- writeengine/splitter/we_filereadthread.cpp | 10 ++++++---- writeengine/splitter/we_filereadthread.h | 2 +- writeengine/wrapper/we_colop.cpp | 9 +++++---- writeengine/wrapper/writeengine.cpp | 17 +++++++++-------- 12 files changed, 38 insertions(+), 28 deletions(-) diff --git a/dbcon/joblist/tupleunion.cpp b/dbcon/joblist/tupleunion.cpp index cf2d20373..d0892e45f 100644 --- a/dbcon/joblist/tupleunion.cpp +++ b/dbcon/joblist/tupleunion.cpp @@ -460,7 +460,7 @@ void TupleUnion::normalize(const Row& in, Row* out) if (in.getScale(i)) { double d = in.getIntField(i); - d /= exp10(in.getScale(i)); + d /= exp10(in.getScale(i)); os.precision(15); os << d; } diff --git a/dbcon/mysql/ha_calpont_ddl.cpp b/dbcon/mysql/ha_calpont_ddl.cpp index 8628f1238..4d5e0508d 100644 --- a/dbcon/mysql/ha_calpont_ddl.cpp +++ b/dbcon/mysql/ha_calpont_ddl.cpp @@ -2266,7 +2266,7 @@ extern "C" int rc = ProcessDDLStatement(stmt, db, "", tid2sid(thd->thread_id), emsg, compressiontype); if (rc != 0) - push_warning(thd, Sql_condition::WARN_LEVEL_WARN, 9999, emsg.c_str()); + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, 9999, emsg.c_str()); return rc; } diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index ca7599e47..271508f42 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -9877,8 +9877,9 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro select_query += ord_cols; } } - // LIMIT and OFFSET are extracted from TABLE_LIST elements. - // All of JOIN-ed tables contain relevant limit and offset. + + // LIMIT and OFFSET are extracted from TABLE_LIST elements. + // All of JOIN-ed tables contain relevant limit and offset. if (gi.groupByTables->select_lex->select_limit) { csep->limitNum(((Item_int*)gi.groupByTables->select_lex->select_limit)->val_int()); @@ -9901,7 +9902,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro setError(gwi.thd, ER_INTERNAL_ERROR, gwi.parseErrorText, gwi); return ER_CHECK_NOT_IMPLEMENTED; } - + } // ORDER BY processing ends here if ( gi.groupByDistinct ) diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 49df3bde3..1ee343e90 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -5581,18 +5581,18 @@ internal_error: */ /*********************************************************** * DESCRIPTION: - * Return a result record for each + * Return a result record for each * group_by_handler::next_row() call. * PARAMETERS: * group_hand - group by handler, that preserves initial * table and items lists. . - * table - TABLE pointer The table to save the result + * table - TABLE pointer The table to save the result * set in. * RETURN: * 0 if success - * HA_ERR_END_OF_FILE if the record set has come to + * HA_ERR_END_OF_FILE if the record set has come to * an end - * others if something went wrong whilst getting the + * others if something went wrong whilst getting the * result set ***********************************************************/ int ha_calpont_impl_group_by_next(ha_calpont_group_by_handler* group_hand, TABLE* table) diff --git a/utils/dataconvert/dataconvert.cpp b/utils/dataconvert/dataconvert.cpp index 657a8d0b1..7b92f36b0 100644 --- a/utils/dataconvert/dataconvert.cpp +++ b/utils/dataconvert/dataconvert.cpp @@ -2769,6 +2769,7 @@ int64_t DataConvert::stringToTime(const string& data) if (*end != '\0') return -1; + hour = day * 24; day = -1; time = data.substr(pos + 1, data.length() - pos - 1); diff --git a/utils/funcexp/func_add_time.cpp b/utils/funcexp/func_add_time.cpp index 61eb9ec6c..c3d5d7d85 100644 --- a/utils/funcexp/func_add_time.cpp +++ b/utils/funcexp/func_add_time.cpp @@ -86,6 +86,7 @@ int64_t addTime(DateTime& dt1, Time& dt2) dt2.day = hour / 24; hour = hour % 24; } + if (hour < 0) { dt.hour = hour + 24; diff --git a/utils/windowfunction/idborderby.h b/utils/windowfunction/idborderby.h index 38b31deb9..a432fdc31 100644 --- a/utils/windowfunction/idborderby.h +++ b/utils/windowfunction/idborderby.h @@ -60,7 +60,7 @@ struct IdbSortSpec { int fIndex; // TODO There are three ordering specs since 10.2 - int fAsc; // ::= ASC | DESC + int fAsc; // ::= ASC | DESC int fNf; // ::= NULLS FIRST | NULLS LAST IdbSortSpec() : fIndex(-1), fAsc(1), fNf(1) {} diff --git a/writeengine/splitter/we_cmdargs.h b/writeengine/splitter/we_cmdargs.h index db5c0c0ac..edaa4cdd1 100644 --- a/writeengine/splitter/we_cmdargs.h +++ b/writeengine/splitter/we_cmdargs.h @@ -77,7 +77,10 @@ public: { return fLocFile; } - int getReadBufSize() { return fReadBufSize; } + int getReadBufSize() + { + return fReadBufSize; + } { return fMode; } diff --git a/writeengine/splitter/we_filereadthread.cpp b/writeengine/splitter/we_filereadthread.cpp index fb80189bb..6840d377d 100644 --- a/writeengine/splitter/we_filereadthread.cpp +++ b/writeengine/splitter/we_filereadthread.cpp @@ -87,6 +87,7 @@ WEFileReadThread::WEFileReadThread(WESDHandler& aSdh): fSdh(aSdh), { //TODO batch qty to get from config fBatchQty = 10000; + if (fSdh.getReadBufSize() < DEFAULTBUFFSIZE) { fBuffSize = DEFAULTBUFFSIZE; @@ -95,6 +96,7 @@ WEFileReadThread::WEFileReadThread(WESDHandler& aSdh): fSdh(aSdh), { fBuffSize = fSdh.getReadBufSize(); } + fBuff = new char [fBuffSize]; } @@ -362,17 +364,17 @@ unsigned int WEFileReadThread::readDataFile(messageqcpp::SBS& Sbs) if (fEnclEsc) { //pStart = aBuff; - aLen = getNextRow(fInFile, fBuff, fBuffSize-1); + aLen = getNextRow(fInFile, fBuff, fBuffSize - 1); } else { - fInFile.getline(fBuff, fBuffSize-1); + fInFile.getline(fBuff, fBuffSize - 1); aLen = fInFile.gcount(); } ////aLen chars incl \n, Therefore aLen-1; '<<' oper won't go past it //cout << "Data Length " << aLen <0)) + if ((aLen < (fBuffSize - 2)) && (aLen > 0)) { fBuff[aLen - 1] = '\n'; fBuff[aLen] = 0; @@ -384,7 +386,7 @@ unsigned int WEFileReadThread::readDataFile(messageqcpp::SBS& Sbs) if (fSdh.getDebugLvl() > 2) cout << "File data line = " << aIdx << endl; } - else if(aLen>=fBuffSize-2) //Didn't hit delim; BIG ROW + else if (aLen >= fBuffSize - 2) //Didn't hit delim; BIG ROW { cout << "Bad Row data " << endl; cout << fBuff << endl; diff --git a/writeengine/splitter/we_filereadthread.h b/writeengine/splitter/we_filereadthread.h index eaa65037a..2fb92332c 100644 --- a/writeengine/splitter/we_filereadthread.h +++ b/writeengine/splitter/we_filereadthread.h @@ -126,7 +126,7 @@ public: void add2InputDataFileList(std::string& FileName); private: - enum { DEFAULTBUFFSIZE=1024*1024 }; + enum { DEFAULTBUFFSIZE = 1024 * 1024 }; // don't allow anyone else to set void setTgtPmId(unsigned int fTgtPmId) diff --git a/writeengine/wrapper/we_colop.cpp b/writeengine/wrapper/we_colop.cpp index df2080af6..ffd01df2e 100644 --- a/writeengine/wrapper/we_colop.cpp +++ b/writeengine/wrapper/we_colop.cpp @@ -222,7 +222,7 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent, //Find out where the rest rows go BRM::LBID_t startLbid; //need to put in a loop until newExtent is true - newExtent = dbRootExtentTrackers[column.colNo]->nextSegFile(dbRoot, partition, segment, newHwm, startLbid); + newExtent = dbRootExtentTrackers[column.colNo]->nextSegFile(dbRoot, partition, segment, newHwm, startLbid); TableMetaData* tableMetaData = TableMetaData::makeTableMetaData(tableOid); while (!newExtent) @@ -238,7 +238,7 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent, for (i = 0; i < dbRootExtentTrackers.size(); i++) { - if (i != column.colNo) + if (i != column.colNo) dbRootExtentTrackers[i]->nextSegFile(dbRoot, partition, segment, newHwm, startLbid); // Round up HWM to the end of the current extent @@ -302,7 +302,8 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent, tableMetaData->setColExtsInfo(newColStructList[i].dataOid, aColExtsInfo); } - newExtent = dbRootExtentTrackers[column.colNo]->nextSegFile(dbRoot, partition, segment, newHwm, startLbid); + + newExtent = dbRootExtentTrackers[column.colNo]->nextSegFile(dbRoot, partition, segment, newHwm, startLbid); } } @@ -323,7 +324,7 @@ int ColumnOp::allocRowId(const TxnID& txnid, bool useStartingExtent, } rc = BRMWrapper::getInstance()->allocateStripeColExtents(cols, dbRoot, partition, segment, extents); - newHwm = extents[column.colNo].startBlkOffset; + newHwm = extents[column.colNo].startBlkOffset; if (rc != NO_ERROR) return rc; diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index 19f66fcbf..7cd275021 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -1683,18 +1683,19 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, for (i = 0; i < colStructList.size(); i++) Convertor::convertColType(&colStructList[i]); - // MCOL-984: find the smallest column width to calculate the RowID from so - // that all HWMs will be incremented by this operation - int32_t lowColLen = 8192; - int32_t colId = 0; - for (uint32_t colIt = 0; colIt < colStructList.size(); colIt++) - { + // MCOL-984: find the smallest column width to calculate the RowID from so + // that all HWMs will be incremented by this operation + int32_t lowColLen = 8192; + int32_t colId = 0; + + for (uint32_t colIt = 0; colIt < colStructList.size(); colIt++) + { if (colStructList[colIt].colWidth < lowColLen) { colId = colIt; lowColLen = colStructList[colId].colWidth; } - } + } // rc = checkValid(txnid, colStructList, colValueList, ridList); // if (rc != NO_ERROR) @@ -1944,7 +1945,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, // allocate row id(s) //-------------------------------------------------------------------------- - curColStruct = colStructList[colId]; + curColStruct = colStructList[colId]; colOp = m_colOp[op(curColStruct.fCompressionType)]; From 4764094e4a66b4517cde88bd7a52debf5ad80cd1 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Thu, 31 May 2018 15:04:04 +0100 Subject: [PATCH 024/123] Fix merge errors --- dbcon/ddlpackage/ddl.y | 3 ++- writeengine/splitter/we_cmdargs.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/dbcon/ddlpackage/ddl.y b/dbcon/ddlpackage/ddl.y index b143895b9..e45df1289 100644 --- a/dbcon/ddlpackage/ddl.y +++ b/dbcon/ddlpackage/ddl.y @@ -198,6 +198,7 @@ VARYING WITH ZONE DOUBLE IDB_FLOAT REAL CHARSET IDB_IF EXISTS CHANGE TRUNCATE %type opt_if_not_exists %type trunc_table_statement %type rename_table_statement +%type ident %% stmtblock: stmtmulti { x->fParseTree = $1; } @@ -615,7 +616,7 @@ table_name: ; qualified_name: - | ident { + ident { if (x->fDBSchema.size()) $$ = new QualifiedName((char*)x->fDBSchema.c_str(), $1); else diff --git a/writeengine/splitter/we_cmdargs.h b/writeengine/splitter/we_cmdargs.h index edaa4cdd1..3186fe6c0 100644 --- a/writeengine/splitter/we_cmdargs.h +++ b/writeengine/splitter/we_cmdargs.h @@ -81,6 +81,7 @@ public: { return fReadBufSize; } + int getMode() { return fMode; } From ed8e774dcd54e471a2858da8734fa6013d307565 Mon Sep 17 00:00:00 2001 From: david hill Date: Fri, 1 Jun 2018 16:33:48 -0500 Subject: [PATCH 025/123] MCOL-1370 --- oam/cloud/MCSVolumeCmds.sh | 4 +- oam/oamcpp/liboamcpp.cpp | 94 ++++++++++++++++++++++++++++++++++++-- oam/oamcpp/liboamcpp.h | 1 + procmgr/main.cpp | 4 +- procmgr/processmanager.cpp | 15 +++--- procmgr/processmanager.h | 2 +- procmon/main.cpp | 6 +-- procmon/processmonitor.cpp | 16 +++---- 8 files changed, 114 insertions(+), 28 deletions(-) diff --git a/oam/cloud/MCSVolumeCmds.sh b/oam/cloud/MCSVolumeCmds.sh index 291d27e44..c7a231261 100755 --- a/oam/cloud/MCSVolumeCmds.sh +++ b/oam/cloud/MCSVolumeCmds.sh @@ -202,7 +202,7 @@ detachvolume() { checkInfostatus if [ $STATUS == "detaching" ]; then retries=1 - while [ $retries -ne 60 ]; do + while [ $retries -ne 10 ]; do #retry until it's attached $AWSCLI detach-volume --volume-id $volumeName --region $Region > /tmp/volumeInfo_$volumeName 2>&1 @@ -239,7 +239,7 @@ attachvolume() { checkInfostatus if [ $STATUS == "attaching" -o $STATUS == "already-attached" ]; then retries=1 - while [ $retries -ne 60 ]; do + while [ $retries -ne 10 ]; do #check status until it's attached describevolume if [ $STATUS == "attached" ]; then diff --git a/oam/oamcpp/liboamcpp.cpp b/oam/oamcpp/liboamcpp.cpp index b6fb8dc2d..9a405e978 100644 --- a/oam/oamcpp/liboamcpp.cpp +++ b/oam/oamcpp/liboamcpp.cpp @@ -5479,7 +5479,7 @@ namespace oam //detach first to make sure DBS can be detach before trying to move to another pm DBRootConfigList::iterator pt3 = residedbrootConfigList.begin(); - for( ; pt3 != residedbrootConfigList.end() ; ) + for( ; pt3 != residedbrootConfigList.end() ; pt3++ ) { int dbrootID = *pt3; @@ -5494,6 +5494,14 @@ namespace oam catch (exception& ) { writeLog("ERROR: amazonDetach failure", LOG_TYPE_ERROR ); + + //reattach + typedef std::vector dbrootList; + dbrootList dbrootlist; + dbrootlist.push_back(itoa(dbrootID)); + + amazonAttach(residePM, dbrootlist); + exceptionControl("autoMovePmDbroot", API_DETACH_FAILURE); } } @@ -5972,9 +5980,8 @@ namespace oam } if (!found) { - writeLog("ERROR: no dbroots found in ../Calpont/local/moveDbrootTransactionLog", LOG_TYPE_ERROR ); - cout << "ERROR: no dbroots found in " << fileName << endl; - exceptionControl("autoUnMovePmDbroot", API_FAILURE); + writeLog("No dbroots found in ../Calpont/local/moveDbrootTransactionLog", LOG_TYPE_DEBUG ); + cout << "No dbroots found in " << fileName << endl; } oldFile.close(); @@ -7269,7 +7276,7 @@ namespace oam else return; - // check if mysql-Capont is installed + // check if mysql-Columnstore is installed string mysqlscript = InstallDir + "/mysql/mysql-Columnstore"; if (access(mysqlscript.c_str(), X_OK) != 0) return; @@ -9727,6 +9734,83 @@ namespace oam } } + /*************************************************************************** + * + * Function: amazonAttach + * + * Purpose: Amazon EC2 volume Attach needed + * + ****************************************************************************/ + + void Oam::amazonAttach(std::string toPM, dbrootList dbrootConfigList) + { + //if amazon cloud with external volumes, do the detach/attach moves + string cloud; + string DBRootStorageType; + try { + getSystemConfig("Cloud", cloud); + getSystemConfig("DBRootStorageType", DBRootStorageType); + } + catch(...) {} + + if ( (cloud == "amazon-ec2" || cloud == "amazon-vpc") && + DBRootStorageType == "external" ) + { + writeLog("amazonAttach function started ", LOG_TYPE_DEBUG ); + + //get Instance Name for to-pm + string toInstanceName = oam::UnassignedName; + try + { + ModuleConfig moduleconfig; + getSystemConfig(toPM, moduleconfig); + HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); + toInstanceName = (*pt1).HostName; + } + catch(...) + {} + + if ( toInstanceName == oam::UnassignedName || toInstanceName.empty() ) + { + cout << " ERROR: amazonAttach, invalid Instance Name for " << toPM << endl; + writeLog("ERROR: amazonAttach, invalid Instance Name " + toPM, LOG_TYPE_ERROR ); + exceptionControl("amazonAttach", API_INVALID_PARAMETER); + } + + dbrootList::iterator pt3 = dbrootConfigList.begin(); + for( ; pt3 != dbrootConfigList.end() ; pt3++) + { + string dbrootid = *pt3; + string volumeNameID = "PMVolumeName" + dbrootid; + string volumeName = oam::UnassignedName; + string deviceNameID = "PMVolumeDeviceName" + dbrootid; + string deviceName = oam::UnassignedName; + try { + getSystemConfig( volumeNameID, volumeName); + getSystemConfig( deviceNameID, deviceName); + } + catch(...) + {} + + if ( volumeName == oam::UnassignedName || deviceName == oam::UnassignedName ) + { + cout << " ERROR: amazonAttach, invalid configure " + volumeName + ":" + deviceName << endl; + writeLog("ERROR: amazonAttach, invalid configure " + volumeName + ":" + deviceName, LOG_TYPE_ERROR ); + exceptionControl("amazonAttach", API_INVALID_PARAMETER); + } + + if (!attachEC2Volume(volumeName, deviceName, toInstanceName)) { + cout << " ERROR: amazonAttach, attachEC2Volume failed on " + volumeName + ":" + deviceName + ":" + toInstanceName << endl; + writeLog("ERROR: amazonAttach, attachEC2Volume failed on " + volumeName + ":" + deviceName + ":" + toInstanceName, LOG_TYPE_ERROR ); + exceptionControl("amazonAttach", API_FAILURE); + } + + writeLog("amazonAttach, attachEC2Volume passed on " + volumeName + ":" + toPM, LOG_TYPE_DEBUG ); + } + } + } + + /*************************************************************************** * * Function: amazonReattach diff --git a/oam/oamcpp/liboamcpp.h b/oam/oamcpp/liboamcpp.h index fdfa7fe40..e5011407c 100644 --- a/oam/oamcpp/liboamcpp.h +++ b/oam/oamcpp/liboamcpp.h @@ -2434,6 +2434,7 @@ namespace oam void amazonReattach(std::string toPM, dbrootList dbrootConfigList, bool attach = false); void mountDBRoot(dbrootList dbrootConfigList, bool mount = true); void amazonDetach(dbrootList dbrootConfigList); + void amazonAttach(std::string toPM, dbrootList dbrootConfigList); /** *@brief gluster control diff --git a/procmgr/main.cpp b/procmgr/main.cpp index 49443de8d..2747fda16 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -1553,7 +1553,7 @@ void pingDeviceThread() processManager.restartProcessType("WriteEngineServer", moduleName); //set module to enable state - processManager.enableModule(moduleName, oam::AUTO_OFFLINE); + processManager.enableModule(moduleName, oam::AUTO_OFFLINE, true); downActiveOAMModule = false; int retry; @@ -1647,7 +1647,7 @@ void pingDeviceThread() } else //set module to enable state - processManager.enableModule(moduleName, oam::AUTO_OFFLINE); + processManager.enableModule(moduleName, oam::AUTO_OFFLINE, true); //restart module processes int retry = 0; diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 36893e050..8b01179d2 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -3438,7 +3438,7 @@ void ProcessManager::recycleProcess(string module, bool enableModule) restartProcessType("ExeMgr"); sleep(1); - restartProcessType("mysql"); + restartProcessType("mysqld"); restartProcessType("WriteEngineServer"); sleep(1); @@ -3457,7 +3457,7 @@ void ProcessManager::recycleProcess(string module, bool enableModule) * purpose: Clear the Disable State on a specified module * ******************************************************************************************/ -int ProcessManager::enableModule(string target, int state) +int ProcessManager::enableModule(string target, int state, bool failover) { Oam oam; ModuleConfig moduleconfig; @@ -3496,7 +3496,8 @@ int ProcessManager::enableModule(string target, int state) setStandbyModule(newStandbyModule); //set recycle process - recycleProcess(target); + if (!failover) + recycleProcess(target); log.writeLog(__LINE__, "enableModule request for " + target + " completed", LOG_TYPE_DEBUG); @@ -4256,7 +4257,7 @@ int ProcessManager::restartProcessType( std::string processName, std::string ski PMwithUM = "n"; } - // If mysql is the processName, then send to modules were ExeMgr is running + // If mysqld is the processName, then send to modules were ExeMgr is running try { oam.getProcessStatus(systemprocessstatus); @@ -4267,7 +4268,7 @@ int ProcessManager::restartProcessType( std::string processName, std::string ski if ( systemprocessstatus.processstatus[i].Module == skipModule ) continue; - if ( processName == "mysql" ) { + if ( processName == "mysqld" ) { if ( systemprocessstatus.processstatus[i].ProcessName == "ExeMgr") { ProcessStatus procstat; oam.getProcessStatus("mysqld", systemprocessstatus.processstatus[i].Module, procstat); @@ -8985,7 +8986,7 @@ int ProcessManager::OAMParentModuleChange() if (systemstatus.SystemOpState == ACTIVE) { log.writeLog(__LINE__, "System Active, restart needed processes", LOG_TYPE_DEBUG); - processManager.restartProcessType("mysql"); + processManager.restartProcessType("mysqld"); processManager.restartProcessType("ExeMgr"); processManager.restartProcessType("WriteEngineServer"); processManager.reinitProcessType("DBRMWorkerNode"); @@ -10099,7 +10100,7 @@ void ProcessManager::stopProcessTypes(bool manualFlag) log.writeLog(__LINE__, "stopProcessTypes Called"); //front-end first - processManager.stopProcessType("mysql", manualFlag); + processManager.stopProcessType("mysqld", manualFlag); processManager.stopProcessType("DMLProc", manualFlag); processManager.stopProcessType("DDLProc", manualFlag); processManager.stopProcessType("ExeMgr", manualFlag); diff --git a/procmgr/processmanager.h b/procmgr/processmanager.h index 55dad53cb..863ad9121 100644 --- a/procmgr/processmanager.h +++ b/procmgr/processmanager.h @@ -307,7 +307,7 @@ public: /** *@brief Enable a specified module */ - int enableModule(std::string target, int state); + int enableModule(std::string target, int state, bool failover = false); /** *@brief Enable a specified module diff --git a/procmon/main.cpp b/procmon/main.cpp index b010b3d74..2f98bc1e7 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -695,8 +695,8 @@ int main(int argc, char **argv) if ( ret != 0 ) log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR); - //mysql status monitor thread - if ( ( config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM ) || + //mysqld status monitor thread + if ( ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) || (PMwithUM == "y") ) { @@ -1127,7 +1127,7 @@ static void mysqlMonitorThread(MonitorConfig config) catch(...) {} - sleep(10); + sleep(5); } } diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index aa10f2666..91f78e640 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -457,7 +457,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO log.writeLog(__LINE__, "MSG RECEIVED: Stop process request on " + processName); int requestStatus = API_SUCCESS; - // check for mysql + // check for mysqld if ( processName == "mysqld" ) { try { oam.actionMysqlCalpont(MYSQL_STOP); @@ -520,7 +520,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO msg >> manualFlag; log.writeLog(__LINE__, "MSG RECEIVED: Start process request on: " + processName); - // check for mysql + // check for mysqld if ( processName == "mysqld" ) { try { oam.actionMysqlCalpont(MYSQL_START); @@ -640,7 +640,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO log.writeLog(__LINE__, "MSG RECEIVED: Restart process request on " + processName); int requestStatus = API_SUCCESS; - // check for mysql restart + // check for mysqld restart if ( processName == "mysqld" ) { try { oam.actionMysqlCalpont(MYSQL_RESTART); @@ -869,7 +869,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO log.writeLog(__LINE__, "Error running DBRM clearShm", LOG_TYPE_ERROR); } - //stop the mysql daemon + //stop the mysqld daemon try { oam.actionMysqlCalpont(MYSQL_STOP); log.writeLog(__LINE__, "Stop MySQL Process", LOG_TYPE_DEBUG); @@ -995,12 +995,12 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO system(cmd.c_str()); - //start the mysql daemon + //start the mysqld daemon try { oam.actionMysqlCalpont(MYSQL_START); } catch(...) - { // mysql didn't start, return with error + { // mysqld didn't start, return with error log.writeLog(__LINE__, "STARTALL: MySQL failed to start, start-module failure", LOG_TYPE_CRITICAL); ackMsg << (ByteStream::byte) ACK; @@ -1265,7 +1265,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO //send down notification oam.sendDeviceNotification(config.moduleName(), MODULE_DOWN); - //stop the mysql daemon and then columnstore + //stop the mysqld daemon and then columnstore try { oam.actionMysqlCalpont(MYSQL_STOP); } @@ -1444,7 +1444,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO } } - // install mysql rpms if being reconfigured as a um + // install mysqld rpms if being reconfigured as a um if ( reconfigureModuleName.find("um") != string::npos ) { string cmd = startup::StartUp::installDir() + "/bin/post-mysqld-install >> /tmp/rpminstall"; system(cmd.c_str()); From f850f8b0d018d5c2b681067ba52d671b24d72b37 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Mon, 4 Jun 2018 15:17:56 +0100 Subject: [PATCH 026/123] MCOL-807 HOUR() is always positive It is undocumented but when MariaDB gets a negative time in the HOUR() function it always returns a positive value. This does the same for us. --- utils/funcexp/func_hour.cpp | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/utils/funcexp/func_hour.cpp b/utils/funcexp/func_hour.cpp index 685a264db..4750829ad 100644 --- a/utils/funcexp/func_hour.cpp +++ b/utils/funcexp/func_hour.cpp @@ -127,22 +127,13 @@ int64_t Func_hour::getIntVal(rowgroup::Row& row, if (isTime) { - // If negative, mask so it doesn't turn positive - bool isNeg = false; + // HOUR() is always positive in MariaDB, even for negative time int64_t mask = 0; if ((val >> 40) & 0x800) mask = 0xfffffffffffff000; - if (!mask && (val >> 63)) - { - isNeg = true; - } - - val = mask | ((val >> 40) & 0xfff); - - if (isNeg) - val *= -1; + val = abs(mask | ((val >> 40) & 0xfff)); } else { From 6948ab85a3308f9e4b29fd4aa739eed4231bfdc5 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Mon, 4 Jun 2018 19:57:42 +0100 Subject: [PATCH 027/123] MCOL-1427 Fix microsecond padding for display We were padding from right, we needed to pad from left. --- dbcon/mysql/ha_calpont_impl.cpp | 4 ++-- utils/dataconvert/dataconvert.cpp | 19 ++++--------------- utils/dataconvert/dataconvert.h | 19 +++---------------- 3 files changed, 9 insertions(+), 33 deletions(-) diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 1ee343e90..21649c2d0 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -582,7 +582,7 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, bool h *(*f)->null_ptr &= ~(*f)->null_bit; intColVal = row.getUintField<8>(s); - DataConvert::datetimeToString(intColVal, tmp, 255); + DataConvert::datetimeToString(intColVal, tmp, 255, colType.precision); /* setting the field_length is a sort-of hack. The length * at this point can be long enough to include mseconds. @@ -606,7 +606,7 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, bool h *(*f)->null_ptr &= ~(*f)->null_bit; intColVal = row.getUintField<8>(s); - DataConvert::timeToString(intColVal, tmp, 255); + DataConvert::timeToString(intColVal, tmp, 255, colType.precision); Field_varstring* f2 = (Field_varstring*)*f; f2->store(tmp, strlen(tmp), f2->charset()); diff --git a/utils/dataconvert/dataconvert.cpp b/utils/dataconvert/dataconvert.cpp index 7b92f36b0..398622631 100644 --- a/utils/dataconvert/dataconvert.cpp +++ b/utils/dataconvert/dataconvert.cpp @@ -2082,13 +2082,8 @@ std::string DataConvert::datetimeToString( long long datetimevalue, long decima if (dt.msecond && decimals) { - snprintf(buf + strlen(buf), 21 + decimals, ".%d", dt.msecond); - - // Pad end with zeros - if (strlen(buf) < (size_t)(21 + decimals)) - { - sprintf(buf + strlen(buf), "%0*d", (int)(21 + decimals - strlen(buf)), 0); - } + // Pad start with zeros + sprintf(buf + strlen(buf), ".%0*d", (int)decimals, dt.msecond); } return buf; @@ -2118,14 +2113,8 @@ std::string DataConvert::timeToString( long long timevalue, long decimals ) if (dt.msecond && decimals) { - size_t start = strlen(buf); - snprintf(buf + strlen(buf), 12 + decimals, ".%d", dt.msecond); - - // Pad end with zeros - if (strlen(buf) - start < (size_t)decimals) - { - sprintf(buf + strlen(buf), "%0*d", (int)(decimals - (strlen(buf) - start) + 1), 0); - } + // Pad start with zeros + sprintf(buf + strlen(buf), ".%0*d", (int)decimals, dt.msecond); } return buf; diff --git a/utils/dataconvert/dataconvert.h b/utils/dataconvert/dataconvert.h index 0cf3480c5..c01f261b6 100644 --- a/utils/dataconvert/dataconvert.h +++ b/utils/dataconvert/dataconvert.h @@ -587,14 +587,7 @@ inline void DataConvert::datetimeToString( long long datetimevalue, char* buf, u if (msec || decimals) { - size_t start = strlen(buf); - snprintf(buf + strlen(buf), buflen - start, ".%d", msec); - - // Pad end with zeros - if (strlen(buf) - start < (size_t)decimals) - { - snprintf(buf + strlen(buf), buflen - strlen(buf), "%0*d", (int)(decimals - (strlen(buf) - start) + 1), 0); - } + snprintf(buf + strlen(buf), buflen - strlen(buf), ".%0*d", (int)decimals, msec); } } @@ -636,14 +629,8 @@ inline void DataConvert::timeToString( long long timevalue, char* buf, unsigned if (msec || decimals) { - size_t start = strlen(buf); - snprintf(buf + strlen(buf), buflen - start, ".%d", msec); - - // Pad end with zeros - if (strlen(buf) - start < (size_t)decimals) - { - snprintf(buf + strlen(buf), buflen - strlen(buf), "%0*d", (int)(decimals - (strlen(buf) - start) + 1), 0); - } + // Pad start with zeros + snprintf(buf + strlen(buf), buflen - strlen(buf), ".%0*d", (int)decimals, msec); } } From fd6a2f46a51486eae4990b516b8300002522114f Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Mon, 4 Jun 2018 20:47:33 +0100 Subject: [PATCH 028/123] MCOL-1429 Fix DAYNAME()/MONTHNAME() NULL result For NULL result -1 cast to a uint was used as an array index. This caused crashes with TIME data type. --- utils/funcexp/func_dayname.cpp | 4 +++- utils/funcexp/func_monthname.cpp | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/utils/funcexp/func_dayname.cpp b/utils/funcexp/func_dayname.cpp index 3825bc1a3..325fb2f4a 100644 --- a/utils/funcexp/func_dayname.cpp +++ b/utils/funcexp/func_dayname.cpp @@ -145,7 +145,9 @@ string Func_dayname::getStrVal(rowgroup::Row& row, bool& isNull, CalpontSystemCatalog::ColType& op_ct) { - uint32_t weekday = getIntVal(row, parm, isNull, op_ct); + int32_t weekday = getIntVal(row, parm, isNull, op_ct); + if (weekday == -1) + return ""; return helpers::weekdayFullNames[weekday]; } diff --git a/utils/funcexp/func_monthname.cpp b/utils/funcexp/func_monthname.cpp index dbe5aa513..8d8200775 100644 --- a/utils/funcexp/func_monthname.cpp +++ b/utils/funcexp/func_monthname.cpp @@ -47,7 +47,9 @@ string Func_monthname::getStrVal(rowgroup::Row& row, bool& isNull, CalpontSystemCatalog::ColType& op_ct) { - uint32_t month = getIntVal(row, parm, isNull, op_ct); + int32_t month = getIntVal(row, parm, isNull, op_ct); + if (month == -1) + return ""; return helpers::monthFullNames[month]; } From 92cb6345fb16cff3db76fa373a3381c630d4e27f Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Mon, 4 Jun 2018 22:03:50 +0100 Subject: [PATCH 029/123] MCOL-1428 Fix SUBTIME() with day in WHERE SUBTIME() with day number used the DATETIME funciton instead of TIME so got stuck in an endless loop. Now uses the TIME calculations instead. --- utils/funcexp/func_add_time.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/utils/funcexp/func_add_time.cpp b/utils/funcexp/func_add_time.cpp index c3d5d7d85..edd05c6d4 100644 --- a/utils/funcexp/func_add_time.cpp +++ b/utils/funcexp/func_add_time.cpp @@ -223,7 +223,14 @@ int64_t Func_add_time::getIntVal(rowgroup::Row& row, bool& isNull, CalpontSystemCatalog::ColType& op_ct) { - return getDatetimeIntVal(row, parm, isNull, op_ct); + if (parm[0]->data()->resultType().colDataType == execplan::CalpontSystemCatalog::TIME) + { + return getTimeIntVal(row, parm, isNull, op_ct); + } + else + { + return getDatetimeIntVal(row, parm, isNull, op_ct); + } } string Func_add_time::getStrVal(rowgroup::Row& row, From d7562aa0ae718f1b53a332deecfee8edd59a7e3e Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Tue, 5 Jun 2018 12:54:37 +0100 Subject: [PATCH 030/123] MCOL-1419 Fix TIME update saturation values We should saturate at the maximum and minimum values for TIME. --- utils/dataconvert/dataconvert.cpp | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/utils/dataconvert/dataconvert.cpp b/utils/dataconvert/dataconvert.cpp index 398622631..6c66a07de 100644 --- a/utils/dataconvert/dataconvert.cpp +++ b/utils/dataconvert/dataconvert.cpp @@ -859,7 +859,7 @@ bool mysql_str_to_datetime( const string& input, DateTime& output, bool& isDate return true; } -bool mysql_str_to_time( const string& input, Time& output ) +bool mysql_str_to_time( const string& input, Time& output, long decimals ) { int32_t datesepct = 0; uint32_t dtend = 0; @@ -999,20 +999,21 @@ bool mysql_str_to_time( const string& input, Time& output ) if ( !isTimeValid( hour, min, sec, usec ) ) { // Emulate MariaDB's time saturation - if (hour > 838) + // TODO: msec saturation + if ((hour > 838) && !isNeg) { output.hour = 838; output.minute = 59; output.second = 59; - output.msecond = 999999; + output.msecond = exp10(decimals) - 1; output.is_neg = 0; } - else if (hour < -838) + else if ((hour < -838) || ((hour > 838) && isNeg)) { output.hour = -838; output.minute = 59; output.second = 59; - output.msecond = 999999; + output.msecond = exp10(decimals) - 1; output.is_neg = 1; } // If neither of the above match then we return a 0 time @@ -1068,9 +1069,9 @@ bool stringToDatetimeStruct(const string& data, DateTime& dtime, bool* date) return true; } -bool stringToTimeStruct(const string& data, Time& dtime) +bool stringToTimeStruct(const string& data, Time& dtime, long decimals) { - if ( !mysql_str_to_time( data, dtime ) ) + if ( !mysql_str_to_time( data, dtime, decimals ) ) return false; return true; @@ -1415,15 +1416,11 @@ DataConvert::convertColumnData(const CalpontSystemCatalog::ColType& colType, { Time aTime; - if (stringToTimeStruct(data, aTime)) + if (!stringToTimeStruct(data, aTime, colType.precision)) { - value = (int64_t) * (reinterpret_cast(&aTime)); - } - else - { - value = (int64_t) 0; pushWarning = true; } + value = (int64_t) * (reinterpret_cast(&aTime)); } break; From fb5f3240101abbdf8a1cbd2dc8fddc20004a5789 Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 5 Jun 2018 12:53:45 -0500 Subject: [PATCH 031/123] MCOL-1201 Fix DISTINCT with UDAF multi-parm --- dbcon/joblist/tupleaggregatestep.cpp | 134 ++++++++++++++++++--------- utils/rowgroup/rowaggregation.cpp | 6 +- 2 files changed, 95 insertions(+), 45 deletions(-) diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 491f86a8f..be0e2009d 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -1140,6 +1140,7 @@ void TupleAggregateStep::prep1PhaseAggregate( // populate the aggregate rowgroup AGG_MAP aggFuncMap; + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -1157,8 +1158,9 @@ void TupleAggregateStep::prep1PhaseAggregate( typeAgg.push_back(ti.dtype); widthAgg.push_back(ti.width); SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( - aggOp, stats, 0, i, jobInfo.cntStarPos)); + aggOp, stats, 0, outIdx, jobInfo.cntStarPos)); functionVec.push_back(funct); + ++outIdx; continue; } @@ -1174,9 +1176,10 @@ void TupleAggregateStep::prep1PhaseAggregate( typeAgg.push_back(ti.dtype); widthAgg.push_back(width); SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( - aggOp, stats, 0, i, -1)); + aggOp, stats, 0, outIdx, -1)); functionVec.push_back(funct); + ++outIdx; continue; } @@ -1222,16 +1225,17 @@ void TupleAggregateStep::prep1PhaseAggregate( widthAgg.push_back(width[colProj]); if (groupBy[it->second]->fOutputColumnIndex == (uint32_t) - 1) - groupBy[it->second]->fOutputColumnIndex = i; + groupBy[it->second]->fOutputColumnIndex = outIdx; else functionVec.push_back(SP_ROWAGG_FUNC_t( new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, - i, + outIdx, groupBy[it->second]->fOutputColumnIndex))); + ++outIdx; continue; } else if (find(jobInfo.expressionVec.begin(), jobInfo.expressionVec.end(), key) != @@ -1244,6 +1248,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(ti.precision); typeAgg.push_back(ti.dtype); widthAgg.push_back(ti.width); + ++outIdx; continue; } else if (jobInfo.groupConcatInfo.columns().find(key) != @@ -1256,6 +1261,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); + ++outIdx; continue; } else if (jobInfo.windowSet.find(key) != jobInfo.windowSet.end()) @@ -1267,6 +1273,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); + ++outIdx; continue; } else @@ -1296,7 +1303,7 @@ void TupleAggregateStep::prep1PhaseAggregate( { pUDAFFunc = udafc->getContext().getFunction(); // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, i)); + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, outIdx)); break; } } @@ -1307,7 +1314,7 @@ void TupleAggregateStep::prep1PhaseAggregate( } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colProj, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colProj, outIdx)); } functionVec.push_back(funct); @@ -1536,6 +1543,11 @@ void TupleAggregateStep::prep1PhaseAggregate( { aggFuncMap.insert(make_pair(boost::make_tuple(key, aggOp, pUDAFFunc), funct->fOutputColumnIndex)); } + + if (aggOp != ROWAGG_MULTI_PARM) + { + ++outIdx; + } } // now fix the AVG function, locate the count(column) position @@ -1687,7 +1699,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( uint32_t bigIntWidth = sizeof(int64_t); // map key = column key, operation (enum), and UDAF pointer if UDAF. AGG_MAP aggFuncMap; - set avgSet; +// set avgSet; + list multiParmIndexes; // fOR udaf UDAFColumn* udafc = NULL; @@ -1842,9 +1855,9 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } // skip sum / count(column) if avg is also selected - if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) && - (avgSet.find(aggKey) != avgSet.end())) - continue; +// if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) && +// (avgSet.find(aggKey) != avgSet.end())) +// continue; if (aggOp == ROWAGG_DISTINCT_SUM || aggOp == ROWAGG_DISTINCT_AVG || @@ -2080,7 +2093,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(udafFuncCol->fUDAFContext.getResultType()); widthAgg.push_back(udafFuncCol->fUDAFContext.getColWidth()); ++colAgg; - // UDAF Dummy holder for UserData struct + // Column for index of UDAF UserData struct oidsAgg.push_back(oidsProj[colProj]); keysAgg.push_back(aggKey); scaleAgg.push_back(0); @@ -2107,6 +2120,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(widthProj[colProj]); + multiParmIndexes.push_back(colAgg); ++colAgg; // If the param is const ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); @@ -2154,7 +2168,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. AGG_MAP aggDupFuncMap; - pUDAFFunc = NULL; + projColsUDAFIdx = 0; + int64_t multiParms = 0; // copy over the groupby vector // update the outputColumnIndex if returned @@ -2165,8 +2180,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i)); } - projColsUDAFIdx = 0; // locate the return column position in aggregated rowgroup + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { udafc = NULL; @@ -2176,23 +2191,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colAgg = -1; - if (aggOp == ROWAGG_UDAF) + if (aggOp == ROWAGG_MULTI_PARM) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; - for (; it != jobInfo.projectionCols.end(); it++) - { - udafc = dynamic_cast((*it).get()); - projColsUDAFIdx++; - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - break; - } - } - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); - } + // Skip on final agg.: Extra parms for an aggregate have no work there. + ++multiParms; + continue; } if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != @@ -2220,6 +2223,25 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } } + if (aggOp == ROWAGG_UDAF) + { + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIdx++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } + } + switch (aggOp) { case ROWAGG_DISTINCT_AVG: @@ -2470,7 +2492,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep1PhaseDistinctAggregate: " << emsg << " oid=" @@ -2494,7 +2516,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByNoDist[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByNoDist[j]->fOutputColumnIndex = i; + groupByNoDist[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByNoDist[j]->fOutputColumnIndex; } @@ -2504,7 +2526,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (dupGroupbyIndex != -1) functionVec2.push_back(SP_ROWAGG_FUNC_t( new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } else { @@ -2512,11 +2534,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colAgg, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -2553,6 +2575,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // for (i // now fix the AVG function, locate the count(column) position @@ -2570,7 +2593,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -2785,6 +2808,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); groupBySub.push_back(groupby); + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; + // tricky part : 2 function vectors // -- dummy function vector for sub-aggregator, which does distinct only // -- aggregate function on this distinct column for rowAggDist @@ -2792,6 +2820,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } if (returnedColVec[k].first != distinctColKey) continue; @@ -2812,7 +2845,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -2831,9 +2864,15 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( { vector functionSub1 = functionNoDistVec; vector functionSub2; + int64_t multiParms = 0; for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } // search non-distinct functions in functionVec vector::iterator it = functionVec2.begin(); @@ -2849,7 +2888,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } else if ((f->fOutputColumnIndex == k) && @@ -2871,7 +2910,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -4272,9 +4311,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funct.reset(new RowUDAFFunctionCol( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fOutputColumnIndex, + udafFuncCol->fOutputColumnIndex-multiParms, udafFuncCol->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); + pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); } else { @@ -4282,9 +4322,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funcPm->fAggFunction, funcPm->fStatsFunction, funcPm->fOutputColumnIndex, - funcPm->fOutputColumnIndex, + funcPm->fOutputColumnIndex-multiParms, funcPm->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); + pUDAFFunc = NULL; } } @@ -4500,6 +4541,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( precisionAggDist.push_back(precisionAggUm[colUm]); typeAggDist.push_back(typeAggUm[colUm]); widthAggDist.push_back(widthAggUm[colUm]); + colUm -= multiParms; } // not a direct hit -- a returned column is not already in the RG from PMs @@ -4536,8 +4578,16 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(retKey); scaleAggDist.push_back(0); - precisionAggDist.push_back(19); - typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + if (isUnsigned(typeAggUm[colUm])) + { + precisionAggDist.push_back(20); + typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); + } + else + { + precisionAggDist.push_back(19); + typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + } widthAggDist.push_back(bigIntWidth); } } diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 6339554f1..bead74aff 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -4284,14 +4284,14 @@ void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, static_any::any valOut; // Get the user data - boost::shared_ptr userData = rowIn.getUserData(colIn + 1); + boost::shared_ptr userDataIn = rowIn.getUserData(colIn+1); // Unlike other aggregates, the data isn't in colIn, so testing it for NULL // there won't help. In case of NULL, userData will be NULL. uint32_t flags[1]; flags[0] = 0; - if (!userData) + if (!userDataIn) { if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { @@ -4309,7 +4309,7 @@ void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, // Call the UDAF subEvaluate method mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userData.get()); + rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userDataIn.get()); fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) From 6fa7dded6fb9c9ebfc50a244c664d9246c0b8578 Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 11 May 2018 09:50:10 -0500 Subject: [PATCH 032/123] MCOL-1201 manual rebase with develop. Obsoletes branch MCOL-1201 --- dbcon/execplan/aggregatecolumn.cpp | 96 +-- dbcon/execplan/aggregatecolumn.h | 44 +- dbcon/joblist/expressionstep.cpp | 12 +- dbcon/joblist/expressionstep.h | 1 + dbcon/joblist/groupconcat.cpp | 2 +- dbcon/joblist/joblistfactory.cpp | 531 ++++++++---- dbcon/joblist/tupleaggregatestep.cpp | 280 +++++-- dbcon/mysql/ha_calpont_execplan.cpp | 858 +++++++++++--------- dbcon/mysql/ha_calpont_impl.cpp | 9 +- dbcon/mysql/ha_window_function.cpp | 37 +- utils/common/any.hpp | 270 +++--- utils/rowgroup/rowaggregation.cpp | 605 +++++++++----- utils/rowgroup/rowaggregation.h | 29 +- utils/udfsdk/CMakeLists.txt | 2 +- utils/udfsdk/allnull.cpp | 7 +- utils/udfsdk/allnull.h | 4 +- utils/udfsdk/avg_mode.cpp | 14 +- utils/udfsdk/avg_mode.h | 14 +- utils/udfsdk/mcsv1_udaf.cpp | 13 +- utils/udfsdk/mcsv1_udaf.h | 88 +- utils/udfsdk/median.cpp | 14 +- utils/udfsdk/median.h | 8 +- utils/udfsdk/ssq.cpp | 14 +- utils/udfsdk/ssq.h | 8 +- utils/udfsdk/udfmysql.cpp | 162 ++++ utils/udfsdk/udfsdk.vpj | 4 + utils/windowfunction/wf_udaf.cpp | 280 +++++-- utils/windowfunction/wf_udaf.h | 27 +- utils/windowfunction/windowfunctiontype.cpp | 8 +- writeengine/wrapper/writeengine.cpp | 10 +- 30 files changed, 2255 insertions(+), 1196 deletions(-) diff --git a/dbcon/execplan/aggregatecolumn.cpp b/dbcon/execplan/aggregatecolumn.cpp index 18cba2607..5bce12d79 100644 --- a/dbcon/execplan/aggregatecolumn.cpp +++ b/dbcon/execplan/aggregatecolumn.cpp @@ -98,36 +98,6 @@ AggregateColumn::AggregateColumn(const uint32_t sessionID): { } -AggregateColumn::AggregateColumn(const AggOp aggOp, ReturnedColumn* parm, const uint32_t sessionID): - ReturnedColumn(sessionID), - fAggOp(aggOp), - fAsc(false), - fData(aggOp + "(" + parm->data() + ")") -{ - fFunctionParms.reset(parm); -} - -AggregateColumn::AggregateColumn(const AggOp aggOp, const string& content, const uint32_t sessionID): - ReturnedColumn(sessionID), - fAggOp(aggOp), - fAsc(false), - fData(aggOp + "(" + content + ")") -{ - // TODO: need to handle distinct - fFunctionParms.reset(new ArithmeticColumn(content)); -} - -// deprecated constructor. use function name as string -AggregateColumn::AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID): - ReturnedColumn(sessionID), - fFunctionName(functionName), - fAggOp(NOOP), - fAsc(false), - fData(functionName + "(" + parm->data() + ")") -{ - fFunctionParms.reset(parm); -} - // deprecated constructor. use function name as string AggregateColumn::AggregateColumn(const string& functionName, const string& content, const uint32_t sessionID): ReturnedColumn(sessionID), @@ -137,20 +107,21 @@ AggregateColumn::AggregateColumn(const string& functionName, const string& conte fData(functionName + "(" + content + ")") { // TODO: need to handle distinct - fFunctionParms.reset(new ArithmeticColumn(content)); + SRCP srcp(new ArithmeticColumn(content)); + fAggParms.push_back(srcp); } AggregateColumn::AggregateColumn( const AggregateColumn& rhs, const uint32_t sessionID ): ReturnedColumn(rhs, sessionID), fFunctionName (rhs.fFunctionName), fAggOp(rhs.fAggOp), - fFunctionParms(rhs.fFunctionParms), fTableAlias(rhs.tableAlias()), fAsc(rhs.asc()), fData(rhs.data()), fConstCol(rhs.fConstCol) { fAlias = rhs.alias(); + fAggParms = rhs.fAggParms; } /** @@ -166,10 +137,14 @@ const string AggregateColumn::toString() const if (fAlias.length() > 0) output << "/Alias: " << fAlias << endl; - if (fFunctionParms == 0) - output << "No arguments" << endl; + if (fAggParms.size() == 0) + output << "No arguments"; else - output << *fFunctionParms << endl; + for (uint32_t i = 0; i < fAggParms.size(); ++i) + { + output << *(fAggParms[i]) << " "; + } + output << endl; if (fConstCol) output << *fConstCol; @@ -191,10 +166,11 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const b << fFunctionName; b << static_cast(fAggOp); - if (fFunctionParms == 0) - b << (uint8_t) ObjectReader::NULL_CLASS; - else - fFunctionParms->serialize(b); + b << static_cast(fAggParms.size()); + for (uint32_t i = 0; i < fAggParms.size(); ++i) + { + fAggParms[i]->serialize(b); + } b << static_cast(fGroupByColList.size()); @@ -219,20 +195,26 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const void AggregateColumn::unserialize(messageqcpp::ByteStream& b) { - ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN); - fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end()); - fProjectColList.erase(fProjectColList.begin(), fProjectColList.end()); - ReturnedColumn::unserialize(b); - b >> fFunctionName; - b >> fAggOp; - //delete fFunctionParms; - fFunctionParms.reset( - dynamic_cast(ObjectReader::createTreeNode(b))); - messageqcpp::ByteStream::quadbyte size; messageqcpp::ByteStream::quadbyte i; ReturnedColumn* rc; + ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN); + fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end()); + fProjectColList.erase(fProjectColList.begin(), fProjectColList.end()); + fAggParms.erase(fAggParms.begin(), fAggParms.end()); + ReturnedColumn::unserialize(b); + b >> fFunctionName; + b >> fAggOp; + + b >> size; + for (i = 0; i < size; i++) + { + rc = dynamic_cast(ObjectReader::createTreeNode(b)); + SRCP srcp(rc); + fAggParms.push_back(srcp); + } + b >> size; for (i = 0; i < size; i++) @@ -261,6 +243,7 @@ void AggregateColumn::unserialize(messageqcpp::ByteStream& b) bool AggregateColumn::operator==(const AggregateColumn& t) const { const ReturnedColumn* rc1, *rc2; + AggParms::const_iterator it, it2; rc1 = static_cast(this); rc2 = static_cast(&t); @@ -277,16 +260,18 @@ bool AggregateColumn::operator==(const AggregateColumn& t) const if (fAggOp != t.fAggOp) return false; - if (fFunctionParms.get() != NULL && t.fFunctionParms.get() != NULL) + if (aggParms().size() != t.aggParms().size()) { - if (*fFunctionParms.get() != t.fFunctionParms.get()) + return false; + } + for (it = fAggParms.begin(), it2 = t.fAggParms.begin(); + it != fAggParms.end(); + ++it, ++it2) + { + if (**it != **it2) return false; } - else if (fFunctionParms.get() != NULL || t.fFunctionParms.get() != NULL) - return false; - //if (fAlias != t.fAlias) - // return false; if (fTableAlias != t.fTableAlias) return false; @@ -645,3 +630,4 @@ AggregateColumn::AggOp AggregateColumn::agname2num(const string& agname) } } // namespace execplan + diff --git a/dbcon/execplan/aggregatecolumn.h b/dbcon/execplan/aggregatecolumn.h index d1db7e5a4..b0884f179 100644 --- a/dbcon/execplan/aggregatecolumn.h +++ b/dbcon/execplan/aggregatecolumn.h @@ -40,6 +40,8 @@ class ByteStream; namespace execplan { +typedef std::vector AggParms; + /** * @brief A class to represent a aggregate return column * @@ -74,7 +76,8 @@ public: BIT_OR, BIT_XOR, GROUP_CONCAT, - UDAF + UDAF, + MULTI_PARM }; /** @@ -94,21 +97,6 @@ public: */ AggregateColumn(const uint32_t sessionID); - /** - * ctor - */ - AggregateColumn(const AggOp aggop, ReturnedColumn* parm, const uint32_t sessionID = 0); - - /** - * ctor - */ - AggregateColumn(const AggOp aggop, const std::string& content, const uint32_t sessionID = 0); - - /** - * ctor - */ - AggregateColumn(const std::string& functionName, ReturnedColumn* parm, const uint32_t sessionID = 0); - /** * ctor */ @@ -155,24 +143,27 @@ public: fAggOp = aggOp; } + /** get function parms - * - * set the function parms from this object */ - virtual const SRCP functionParms() const + virtual AggParms& aggParms() { - return fFunctionParms; + return fAggParms; + } + + virtual const AggParms& aggParms() const + { + return fAggParms; } /** set function parms - * - * set the function parms for this object */ - virtual void functionParms(const SRCP& functionParms) + virtual void aggParms(const AggParms& parms) { - fFunctionParms = functionParms; + fAggParms = parms; } + /** return a copy of this pointer * * deep copy of this pointer and return the copy @@ -325,9 +316,10 @@ protected: uint8_t fAggOp; /** - * A ReturnedColumn objects that are the arguments to this function + * ReturnedColumn objects that are the arguments to this + * function */ - SRCP fFunctionParms; + AggParms fAggParms; /** table alias * A string to represent table alias name which contains this column diff --git a/dbcon/joblist/expressionstep.cpp b/dbcon/joblist/expressionstep.cpp index 0e064c359..4a8a14ff3 100644 --- a/dbcon/joblist/expressionstep.cpp +++ b/dbcon/joblist/expressionstep.cpp @@ -56,6 +56,17 @@ using namespace rowgroup; namespace joblist { +ExpressionStep::ExpressionStep() : + fExpressionFilter(NULL), + fExpressionId(-1), + fVarBinOK(false), + fSelectFilter(false), + fAssociatedJoinId(0), + fDoJoin(false), + fVirtual(false) +{ +} + ExpressionStep::ExpressionStep(const JobInfo& jobInfo) : JobStep(jobInfo), fExpressionFilter(NULL), @@ -68,7 +79,6 @@ ExpressionStep::ExpressionStep(const JobInfo& jobInfo) : { } - ExpressionStep::ExpressionStep(const ExpressionStep& rhs) : JobStep(rhs), fExpression(rhs.expression()), diff --git a/dbcon/joblist/expressionstep.h b/dbcon/joblist/expressionstep.h index 4a069440f..63423fc7d 100644 --- a/dbcon/joblist/expressionstep.h +++ b/dbcon/joblist/expressionstep.h @@ -50,6 +50,7 @@ class ExpressionStep : public JobStep { public: // constructors + ExpressionStep(); ExpressionStep(const JobInfo&); // destructor constructors virtual ~ExpressionStep(); diff --git a/dbcon/joblist/groupconcat.cpp b/dbcon/joblist/groupconcat.cpp index 234fc0a8e..afc91a2ec 100644 --- a/dbcon/joblist/groupconcat.cpp +++ b/dbcon/joblist/groupconcat.cpp @@ -78,7 +78,7 @@ void GroupConcatInfo::prepGroupConcat(JobInfo& jobInfo) while (i != jobInfo.groupConcatCols.end()) { GroupConcatColumn* gcc = dynamic_cast(i->get()); - const RowColumn* rcp = dynamic_cast(gcc->functionParms().get()); + const RowColumn* rcp = dynamic_cast(gcc->aggParms()[0].get()); SP_GroupConcat groupConcat(new GroupConcat); groupConcat->fSeparator = gcc->separator(); diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index a48ecd13a..4cf7bccc5 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -18,7 +18,6 @@ // $Id: joblistfactory.cpp 9632 2013-06-18 22:18:20Z xlou $ - #include #include #include @@ -870,7 +869,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo if (gcc != NULL) { - srcp = gcc->functionParms(); + srcp = gcc->aggParms()[0]; const RowColumn* rcp = dynamic_cast(srcp.get()); const vector& cols = rcp->columnVec(); @@ -891,21 +890,55 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } +#if 0 + // MCOL-1201 Add support for multi-parameter UDAnF + UDAFColumn* udafc = dynamic_cast(retCols[i].get()); + if (udafc != NULL) + { + srcp = udafc->aggParms()[0]; + const RowColumn* rcp = dynamic_cast(srcp.get()); + const vector& cols = rcp->columnVec(); + for (vector::const_iterator j = cols.begin(); j != cols.end(); j++) + { + srcp = *j; + if (dynamic_cast(srcp.get()) == NULL) + retCols.push_back(srcp); + + // Do we need this? + const ArithmeticColumn* ac = dynamic_cast(srcp.get()); + const FunctionColumn* fc = dynamic_cast(srcp.get()); + if (ac != NULL || fc != NULL) + { + // bug 3728, make a dummy expression step for each expression. + scoped_ptr es(new ExpressionStep(jobInfo)); + es->expression(srcp, jobInfo); + } + } + continue; + } +#endif srcp = retCols[i]; const AggregateColumn* ag = dynamic_cast(retCols[i].get()); - - if (ag != NULL) - srcp = ag->functionParms(); - - const ArithmeticColumn* ac = dynamic_cast(srcp.get()); - const FunctionColumn* fc = dynamic_cast(srcp.get()); - - if (ac != NULL || fc != NULL) + // bug 3728 Make a dummy expression for srcp if it is an + // expression. This is needed to fill in some stuff. + // Note that es.expression does nothing if the item is not an expression. + if (ag == NULL) { - // bug 3728, make a dummy expression step for each expression. - scoped_ptr es(new ExpressionStep(jobInfo)); - es->expression(srcp, jobInfo); + // Not an aggregate. Make a dummy expression for the item + ExpressionStep es; + es.expression(srcp, jobInfo); + } + else + { + // MCOL-1201 multi-argument aggregate. make a dummy expression + // step for each argument that is an expression. + for (uint32_t i = 0; i < ag->aggParms().size(); ++i) + { + srcp = ag->aggParms()[i]; + ExpressionStep es; + es.expression(srcp, jobInfo); + } } } @@ -915,17 +948,18 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo { srcp = retCols[i]; const SimpleColumn* sc = dynamic_cast(srcp.get()); + AggregateColumn* aggc = dynamic_cast(srcp.get()); bool doDistinct = (csep->distinct() && csep->groupByCols().empty()); uint32_t tupleKey = -1; string alias; string view; - // returned column could be groupby column, a simplecoulumn not a agregatecolumn + // returned column could be groupby column, a simplecoulumn not an aggregatecolumn int op = 0; CalpontSystemCatalog::OID dictOid = 0; CalpontSystemCatalog::ColType ct, aggCt; - if (sc == NULL) + if (aggc) { GroupConcatColumn* gcc = dynamic_cast(retCols[i].get()); @@ -939,7 +973,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo tupleKey = ti.key; jobInfo.returnedColVec.push_back(make_pair(tupleKey, gcc->aggOp())); // not a tokenOnly column. Mark all the columns involved - srcp = gcc->functionParms(); + srcp = gcc->aggParms()[0]; const RowColumn* rowCol = dynamic_cast(srcp.get()); if (rowCol) @@ -963,186 +997,353 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } - - AggregateColumn* ac = dynamic_cast(retCols[i].get()); - - if (ac != NULL) + else { - srcp = ac->functionParms(); - sc = dynamic_cast(srcp.get()); + // Aggregate column not group concat + AggParms& aggParms = aggc->aggParms(); - if (ac->constCol().get() != NULL) + for (uint32_t parm = 0; parm < aggParms.size(); ++parm) { - // replace the aggregate on constant with a count(*) - SRCP clone; - UDAFColumn* udafc = dynamic_cast(ac); - - if (udafc) + if (aggc->constCol().get() != NULL) { - clone.reset(new UDAFColumn(*udafc, ac->sessionID())); + // replace the aggregate on constant with a count(*) + SRCP clone; + UDAFColumn* udafc = dynamic_cast(aggc); + + if (udafc) + { + clone.reset(new UDAFColumn(*udafc, aggc->sessionID())); + } + else + { + clone.reset(new AggregateColumn(*aggc, aggc->sessionID())); + } + + jobInfo.constAggregate.insert(make_pair(i, clone)); + aggc->aggOp(AggregateColumn::COUNT_ASTERISK); + aggc->distinct(false); + } + + srcp = aggParms[parm]; + sc = dynamic_cast(srcp.get()); + if (parm == 0) + { + op = aggc->aggOp(); } else { - clone.reset(new AggregateColumn(*ac, ac->sessionID())); + op = AggregateColumn::MULTI_PARM; + } + doDistinct = aggc->distinct(); + if (aggParms.size() == 1) + { + // Set the col type based on the single parm. + // Changing col type based on a parm if multiple parms + // doesn't really make sense. + updateAggregateColType(aggc, srcp, op, jobInfo); + } + aggCt = aggc->resultType(); + + // As of bug3695, make sure varbinary is not used in aggregation. + // TODO: allow for UDAF + if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY) + throw runtime_error ("VARBINARY in aggregate function is not supported."); + + // Project the parm columns or expressions + if (sc != NULL) + { + CalpontSystemCatalog::OID retOid = sc->oid(); + CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); + alias = extractTableAlias(sc); + view = sc->viewName(); + + if (!sc->schemaName().empty()) + { + ct = sc->colType(); + + //XXX use this before connector sets colType in sc correctly. + if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) + ct = jobInfo.csc->colType(sc->oid()); + + //X + dictOid = isDictCol(ct); + } + else + { + retOid = (tblOid + 1) + sc->colPosition(); + ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; + } + + TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); + tupleKey = ti.key; + + // this is a string column + if (dictOid > 0) + { + map::iterator findit = jobInfo.tokenOnly.find(tupleKey); + + // if the column has never seen, and the op is count: possible need count only. + if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) + { + if (findit == jobInfo.tokenOnly.end()) + jobInfo.tokenOnly[tupleKey] = true; + } + // if aggregate other than count, token is not enough. + else if (op != 0 || doDistinct) + { + jobInfo.tokenOnly[tupleKey] = false; + } + + findit = jobInfo.tokenOnly.find(tupleKey); + + if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) + { + dictMap[tupleKey] = dictOid; + jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; + ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); + jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + } + } + } + else + { + const ArithmeticColumn* ac = NULL; + const FunctionColumn* fc = NULL; + const WindowFunctionColumn* wc = NULL; + bool hasAggCols = false; + + if ((ac = dynamic_cast(srcp.get())) != NULL) + { + if (ac->aggColumnList().size() > 0) + hasAggCols = true; + } + else if ((fc = dynamic_cast(srcp.get())) != NULL) + { + if (fc->aggColumnList().size() > 0) + hasAggCols = true; + } + else if (dynamic_cast(srcp.get()) != NULL) + { + std::ostringstream errmsg; + errmsg << "Invalid aggregate function nesting."; + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + else if (dynamic_cast(srcp.get()) != NULL) + { + } + else if ((wc = dynamic_cast(srcp.get())) == NULL) + { + std::ostringstream errmsg; + errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + + uint64_t eid = srcp.get()->expressionId(); + ct = srcp.get()->resultType(); + TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); + tupleKey = ti.key; + + if (hasAggCols) + jobInfo.expressionVec.push_back(tupleKey); } - jobInfo.constAggregate.insert(make_pair(i, clone)); - ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ac->distinct(false); - } + // add to project list + vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - op = ac->aggOp(); - doDistinct = ac->distinct(); - updateAggregateColType(ac, srcp, op, jobInfo); - aggCt = ac->resultType(); + if (keyIt == projectKeys.end()) + { + RetColsVector::iterator it = pcv.end(); - // As of bug3695, make sure varbinary is not used in aggregation. - if (sc != NULL && sc->resultType().colDataType == CalpontSystemCatalog::VARBINARY) - throw runtime_error ("VARBINARY in aggregate function is not supported."); - } - } + if (doDistinct) + it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); + else + it = pcv.insert(pcv.end(), srcp); - // simple column selected or aggregated - if (sc != NULL) - { - // one column only need project once - CalpontSystemCatalog::OID retOid = sc->oid(); - CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); - alias = extractTableAlias(sc); - view = sc->viewName(); + projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); + } + else if (doDistinct) // @bug4250, move forward distinct column if necessary. + { + uint32_t pos = distance(projectKeys.begin(), keyIt); - if (!sc->schemaName().empty()) - { - ct = sc->colType(); + if (pos >= lastGroupByPos) + { + pcv[pos] = pcv[lastGroupByPos]; + pcv[lastGroupByPos] = srcp; + projectKeys[pos] = projectKeys[lastGroupByPos]; + projectKeys[lastGroupByPos] = tupleKey; + lastGroupByPos++; + } + } -//XXX use this before connector sets colType in sc correctly. - if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) - ct = jobInfo.csc->colType(sc->oid()); + if (doDistinct && dictOid > 0) + tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; -//X - dictOid = isDictCol(ct); - } - else - { - retOid = (tblOid + 1) + sc->colPosition(); - ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; - } + // remember the columns to be returned + jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); - TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); - tupleKey = ti.key; + if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) + jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; - // this is a string column - if (dictOid > 0) - { - map::iterator findit = jobInfo.tokenOnly.find(tupleKey); - - // if the column has never seen, and the op is count: possible need count only. - if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) - { - if (findit == jobInfo.tokenOnly.end()) - jobInfo.tokenOnly[tupleKey] = true; - } - // if aggregate other than count, token is not enough. - else if (op != 0 || doDistinct) - { - jobInfo.tokenOnly[tupleKey] = false; - } - - findit = jobInfo.tokenOnly.find(tupleKey); - - if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) - { - dictMap[tupleKey] = dictOid; - jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; - ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); - jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } } else { - const ArithmeticColumn* ac = NULL; - const FunctionColumn* fc = NULL; - const WindowFunctionColumn* wc = NULL; - bool hasAggCols = false; - - if ((ac = dynamic_cast(srcp.get())) != NULL) + // Not an Aggregate + // simple column selected + if (sc != NULL) { - if (ac->aggColumnList().size() > 0) - hasAggCols = true; + // one column only need project once + CalpontSystemCatalog::OID retOid = sc->oid(); + CalpontSystemCatalog::OID tblOid = tableOid(sc, jobInfo.csc); + alias = extractTableAlias(sc); + view = sc->viewName(); + + if (!sc->schemaName().empty()) + { + ct = sc->colType(); + + //XXX use this before connector sets colType in sc correctly. + if (sc->isInfiniDB() && dynamic_cast(sc) == NULL) + ct = jobInfo.csc->colType(sc->oid()); + + //X + dictOid = isDictCol(ct); + } + else + { + retOid = (tblOid + 1) + sc->colPosition(); + ct = jobInfo.vtableColTypes[UniqId(retOid, alias, "", "")]; + } + + TupleInfo ti(setTupleInfo(ct, retOid, jobInfo, tblOid, sc, alias)); + tupleKey = ti.key; + + // this is a string column + if (dictOid > 0) + { + map::iterator findit = jobInfo.tokenOnly.find(tupleKey); + + // if the column has never seen, and the op is count: possible need count only. + if (AggregateColumn::COUNT == op || AggregateColumn::COUNT_ASTERISK == op) + { + if (findit == jobInfo.tokenOnly.end()) + jobInfo.tokenOnly[tupleKey] = true; + } + // if aggregate other than count, token is not enough. + else if (op != 0 || doDistinct) + { + jobInfo.tokenOnly[tupleKey] = false; + } + + findit = jobInfo.tokenOnly.find(tupleKey); + + if (!(findit != jobInfo.tokenOnly.end() && findit->second == true)) + { + dictMap[tupleKey] = dictOid; + jobInfo.keyInfo->dictOidToColOid[dictOid] = retOid; + ti = setTupleInfo(ct, dictOid, jobInfo, tblOid, sc, alias); + jobInfo.keyInfo->dictKeyMap[tupleKey] = ti.key; + } + } } - else if ((fc = dynamic_cast(srcp.get())) != NULL) - { - if (fc->aggColumnList().size() > 0) - hasAggCols = true; - } - else if (dynamic_cast(srcp.get()) != NULL) - { - std::ostringstream errmsg; - errmsg << "Invalid aggregate function nesting."; - cerr << boldStart << errmsg.str() << boldStop << endl; - throw logic_error(errmsg.str()); - } - else if ((wc = dynamic_cast(srcp.get())) == NULL) - { - std::ostringstream errmsg; - errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); - cerr << boldStart << errmsg.str() << boldStop << endl; - throw logic_error(errmsg.str()); - } - - uint64_t eid = srcp.get()->expressionId(); - ct = srcp.get()->resultType(); - TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); - tupleKey = ti.key; - - if (hasAggCols) - jobInfo.expressionVec.push_back(tupleKey); - } - - // add to project list - vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - - if (keyIt == projectKeys.end()) - { - RetColsVector::iterator it = pcv.end(); - - if (doDistinct) - it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); else - it = pcv.insert(pcv.end(), srcp); - - projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); - } - else if (doDistinct) // @bug4250, move forward distinct column if necessary. - { - uint32_t pos = distance(projectKeys.begin(), keyIt); - - if (pos >= lastGroupByPos) { - pcv[pos] = pcv[lastGroupByPos]; - pcv[lastGroupByPos] = srcp; - projectKeys[pos] = projectKeys[lastGroupByPos]; - projectKeys[lastGroupByPos] = tupleKey; - lastGroupByPos++; + const ArithmeticColumn* ac = NULL; + const FunctionColumn* fc = NULL; + const WindowFunctionColumn* wc = NULL; + bool hasAggCols = false; + + if ((ac = dynamic_cast(srcp.get())) != NULL) + { + if (ac->aggColumnList().size() > 0) + hasAggCols = true; + } + else if ((fc = dynamic_cast(srcp.get())) != NULL) + { + if (fc->aggColumnList().size() > 0) + hasAggCols = true; + } + else if (dynamic_cast(srcp.get()) != NULL) + { + std::ostringstream errmsg; + errmsg << "Invalid aggregate function nesting."; + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + else if (dynamic_cast(srcp.get()) != NULL) + { + } + else if ((wc = dynamic_cast(srcp.get())) == NULL) + { + std::ostringstream errmsg; + errmsg << "doAggProject: unsupported column: " << typeid(*(srcp.get())).name(); + cerr << boldStart << errmsg.str() << boldStop << endl; + throw logic_error(errmsg.str()); + } + + uint64_t eid = srcp.get()->expressionId(); + ct = srcp.get()->resultType(); + TupleInfo ti(setExpTupleInfo(ct, eid, srcp.get()->alias(), jobInfo)); + tupleKey = ti.key; + + if (hasAggCols) + jobInfo.expressionVec.push_back(tupleKey); } - } - if (doDistinct && dictOid > 0) - tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + // add to project list + vector::iterator keyIt = find(projectKeys.begin(), projectKeys.end(), tupleKey); - // remember the columns to be returned - jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + if (keyIt == projectKeys.end()) + { + RetColsVector::iterator it = pcv.end(); - if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) - jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + if (doDistinct) + it = pcv.insert(pcv.begin() + lastGroupByPos++, srcp); + else + it = pcv.insert(pcv.end(), srcp); - // bug 1499 distinct processing, save unique distinct columns - if (doDistinct && - (jobInfo.distinctColVec.end() == - find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) - { - jobInfo.distinctColVec.push_back(tupleKey); + projectKeys.insert(projectKeys.begin() + distance(pcv.begin(), it), tupleKey); + } + else if (doDistinct) // @bug4250, move forward distinct column if necessary. + { + uint32_t pos = distance(projectKeys.begin(), keyIt); + + if (pos >= lastGroupByPos) + { + pcv[pos] = pcv[lastGroupByPos]; + pcv[lastGroupByPos] = srcp; + projectKeys[pos] = projectKeys[lastGroupByPos]; + projectKeys[lastGroupByPos] = tupleKey; + lastGroupByPos++; + } + } + + if (doDistinct && dictOid > 0) + tupleKey = jobInfo.keyInfo->dictKeyMap[tupleKey]; + + // remember the columns to be returned + jobInfo.returnedColVec.push_back(make_pair(tupleKey, op)); + + if (op == AggregateColumn::AVG || op == AggregateColumn::DISTINCT_AVG) + jobInfo.scaleOfAvg[tupleKey] = (ct.scale << 8) + aggCt.scale; + + // bug 1499 distinct processing, save unique distinct columns + if (doDistinct && + (jobInfo.distinctColVec.end() == + find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), tupleKey))) + { + jobInfo.distinctColVec.push_back(tupleKey); + } } } diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 9e23ac17b..ff490da5b 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -164,6 +164,9 @@ inline RowAggFunctionType functionIdMap(int planFuncId) case AggregateColumn::UDAF: return ROWAGG_UDAF; + case AggregateColumn::MULTI_PARM: + return ROWAGG_MULTI_PARM; + default: return ROWAGG_FUNCT_UNDEFINE; } @@ -1302,7 +1305,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -1468,7 +1471,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (!udafFuncCol) { - throw logic_error("prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); @@ -1483,6 +1486,17 @@ void TupleAggregateStep::prep1PhaseAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(key); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + widthAgg.push_back(width[colProj]); + } + break; + default: { ostringstream emsg; @@ -1560,7 +1574,7 @@ void TupleAggregateStep::prep1PhaseAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(3)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVec[i]->fAuxColumnIndex = lastCol++; @@ -1675,7 +1689,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation map projColPosMap; @@ -1848,7 +1862,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -2043,7 +2057,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (!udafFuncCol) { - throw logic_error("prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep1PhaseDistinctAggregate A UDAF function is called but there's no RowUDAFFunctionCol"); } // Return column @@ -2065,6 +2079,18 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(aggKey); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + widthAgg.push_back(widthProj[colProj]); + ++colAgg; + } + break; + default: { ostringstream emsg; @@ -2111,7 +2137,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( groupByNoDist.push_back(groupby); aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i)); } - + + projColsUDAFIndex = 0; // locate the return column position in aggregated rowgroup for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -2121,6 +2148,14 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colAgg = -1; + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + + if (udafc) + pUDAFFunc = udafc->getContext().getFunction(); + } + if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { @@ -2432,11 +2467,37 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { - SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + // update the aggregate function vector + SP_ROWAGG_FUNC_t funct; + if (aggOp == ROWAGG_UDAF) + { + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } + } + else + { + funct.reset(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + } if (aggOp == ROWAGG_COUNT_NO_OP) funct->fAuxColumnIndex = colAgg; @@ -2549,7 +2610,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep1PhaseDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVec2[i]->fAuxColumnIndex = lastCol++; @@ -2893,7 +2954,7 @@ void TupleAggregateStep::prep2PhasesAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation vector width; map projColPosMap; @@ -3036,12 +3097,11 @@ void TupleAggregateStep::prep2PhasesAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAggPm)); break; } - } if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -3240,7 +3300,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } oidsAggPm.push_back(oidsProj[colProj]); @@ -3261,6 +3321,18 @@ void TupleAggregateStep::prep2PhasesAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(scaleProj[colProj]); + precisionAggPm.push_back(precisionProj[colProj]); + typeAggPm.push_back(typeProj[colProj]); + widthAggPm.push_back(width[colProj]); + colAggPm++; + } + break; + default: { ostringstream emsg; @@ -3278,11 +3350,16 @@ void TupleAggregateStep::prep2PhasesAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap; AGG_MAP aggDupFuncMap; + projColsUDAFIndex = 0; // copy over the groupby vector // update the outputColumnIndex if returned for (uint64_t i = 0; i < groupByPm.size(); i++) @@ -3299,7 +3376,14 @@ void TupleAggregateStep::prep2PhasesAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colPm = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + ++multiParms; + continue; + } // Is this a UDAF? use the function as part of the key. + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; if (aggOp == ROWAGG_UDAF) @@ -3452,20 +3536,36 @@ void TupleAggregateStep::prep2PhasesAggregate( functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { + // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i)); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i-multiParms)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i-multiParms)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -3517,7 +3617,7 @@ void TupleAggregateStep::prep2PhasesAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = returnedColVec.size() - multiParms; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -3545,7 +3645,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep2PhasesAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; @@ -3691,6 +3791,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector groupByPm, groupByUm, groupByNoDist; vector functionVecPm, functionNoDistVec, functionVecUm; + list multiParmIndexes; uint32_t bigIntWidth = sizeof(int64_t); map, uint64_t> avgFuncDistMap; @@ -3702,7 +3803,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // the groupby columns are put in front, even not a returned column // sum and count(column name) are omitted, if avg present { - // project only uniq oids, but they may be repeated in aggregation + // project only unique oids, but they may be repeated in aggregation // collect the projected column info, prepare for aggregation vector width; map projColPosMap; @@ -3856,7 +3957,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it == jobInfo.projectionCols.end()) { - throw logic_error("prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); } } else @@ -4050,7 +4151,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(2)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } // Return column @@ -4072,6 +4173,19 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( break; } + case ROWAGG_MULTI_PARM: + { + oidsAggPm.push_back(oidsProj[colProj]); + keysAggPm.push_back(aggKey); + scaleAggPm.push_back(scaleProj[colProj]); + precisionAggPm.push_back(precisionProj[colProj]); + typeAggPm.push_back(typeProj[colProj]); + widthAggPm.push_back(width[colProj]); + multiParmIndexes.push_back(colAggPm); + colAggPm++; + } + break; + default: { ostringstream emsg; @@ -4093,12 +4207,23 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( groupByUm.push_back(groupby); } + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; for (uint32_t idx = 0; idx < functionVecPm.size(); idx++) + { SP_ROWAGG_FUNC_t funct; SP_ROWAGG_FUNC_t funcPm = functionVecPm[idx]; // UDAF support + if (funcPm->fAggFunction == ROWAGG_MULTI_PARM) + { + // Multi-Parm is not used on the UM + ++multiParms; + continue; + } if (funcPm->fAggFunction == ROWAGG_UDAF) { RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); @@ -4106,7 +4231,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); } else @@ -4116,18 +4241,25 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funcPm->fStatsFunction, funcPm->fOutputColumnIndex, funcPm->fOutputColumnIndex, - funcPm->fAuxColumnIndex)); + funcPm->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); } } - posAggUm = posAggPm; - oidsAggUm = oidsAggPm; - keysAggUm = keysAggPm; - scaleAggUm = scaleAggPm; - precisionAggUm = precisionAggPm; - widthAggUm = widthAggPm; - typeAggUm = typeAggPm; + // Copy over the PM arrays to the UM. Skip any that are a multi-parm entry. + for (uint32_t idx = 0; idx < oidsAggPm.size(); ++idx) + { + if (find (multiParmIndexes.begin(), multiParmIndexes.end(), idx ) != multiParmIndexes.end()) + { + continue; + } + oidsAggUm.push_back(oidsAggPm[idx]); + keysAggUm.push_back(keysAggPm[idx]); + scaleAggUm.push_back(scaleAggPm[idx]); + precisionAggUm.push_back(precisionAggPm[idx]); + widthAggUm.push_back(widthAggPm[idx]); + typeAggUm.push_back(typeAggPm[idx]); + } } @@ -4137,6 +4269,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4159,6 +4295,21 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colUm = -1; + if (aggOp == ROWAGG_MULTI_PARM) + { + // Skip on UM: Extra parms for an aggregate have no work on the UM + ++multiParms; + continue; + } + + if (aggOp == ROWAGG_UDAF) + { + UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + + if (udafc) + pUDAFFunc = udafc->getContext().getFunction(); + } + if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { @@ -4285,7 +4436,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it != aggFuncMap.end()) { - colUm = it->second; + colUm = it->second - multiParms; oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(keysAggUm[colUm]); scaleAggDist.push_back(scaleAggUm[colUm]); @@ -4309,7 +4460,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // false alarm returnColMissing = false; - colUm = it->second; + colUm = it->second - multiParms; if (aggOp == ROWAGG_SUM) { @@ -4412,21 +4563,36 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); } - - // update the aggregate function vector else { + // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - pUDAFFunc = udafc->getContext().getFunction(); - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i)); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + + for (; it != jobInfo.projectionCols.end(); it++) + { + UDAFColumn* udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + // Create a RowAggFunctionCol (UDAF subtype) with the context. + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i-multiParms)); + break; + } + } + + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + } } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i-multiParms)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -4480,7 +4646,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = returnedColVec.size() - multiParms; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -4540,7 +4706,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(9)A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; @@ -4687,6 +4853,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); groupBySub.push_back(groupby); + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; + // tricky part : 2 function vectors // -- dummy function vector for sub-aggregator, which does distinct only // -- aggregate function on this distinct column for rowAggDist @@ -4694,6 +4865,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } if (returnedColVec[k].first != distinctColKey) continue; @@ -4715,7 +4891,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -4732,9 +4908,15 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( { vector functionSub1 = functionNoDistVec; vector functionSub2; + int64_t multiParms = 0; for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } // search non-distinct functions in functionVec vector::iterator it = functionVecUm.begin(); @@ -4752,7 +4934,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } else if (f->fAggFunction == ROWAGG_COUNT_ASTERISK || @@ -4773,7 +4955,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 271508f42..9150d5393 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4038,6 +4038,10 @@ ParseTree* buildParseTree(Item_func* item, gp_walk_info& gwi, bool& nonSupport) ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { + // MCOL-1201 For UDAnF multiple parameters + vector selCols; + vector orderCols; + if (!(gwi.thd->infinidb_vtable.cal_conn_info)) gwi.thd->infinidb_vtable.cal_conn_info = (void*)(new cal_connection_info()); @@ -4054,6 +4058,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // N.B. argument_count() is the # of formal parms to the agg fcn. InifniDB only supports 1 argument // TODO: Support more than one parm +#if 0 if (isp->argument_count() != 1 && isp->sum_func() != Item_sum::GROUP_CONCAT_FUNC && isp->sum_func() != Item_sum::UDF_SUM_FUNC) { @@ -4061,7 +4066,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_MUL_ARG_AGG); return NULL; } - +#endif AggregateColumn* ac = NULL; if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) @@ -4084,444 +4089,509 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { gwi.fatalParseError = true; gwi.parseErrorText = "Non supported aggregate type on the select clause"; + if (ac) + delete ac; return NULL; } - // special parsing for group_concat - if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + try { - Item_func_group_concat* gc = (Item_func_group_concat*)isp; + + // special parsing for group_concat + if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + Item_func_group_concat* gc = (Item_func_group_concat*)isp; vector orderCols; - RowColumn* rowCol = new RowColumn(); + RowColumn* rowCol = new RowColumn(); vector selCols; - uint32_t select_ctn = gc->count_field(); - ReturnedColumn* rc = NULL; + uint32_t select_ctn = gc->count_field(); + ReturnedColumn* rc = NULL; - for (uint32_t i = 0; i < select_ctn; i++) - { - rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); - - if (!rc || gwi.fatalParseError) - return NULL; - - selCols.push_back(SRCP(rc)); - } - - ORDER** order_item, **end; - - for (order_item = gc->get_order(), - end = order_item + gc->order_field(); order_item < end; - order_item++) - { - Item* ord_col = *(*order_item)->item; - - if (ord_col->type() == Item::INT_ITEM) + for (uint32_t i = 0; i < select_ctn; i++) { - Item_int* id = (Item_int*)ord_col; - - if (id->val_int() > (int)selCols.size()) - { - gwi.fatalParseError = true; - return NULL; - } - - rc = selCols[id->val_int() - 1]->clone(); - rc->orderPos(id->val_int() - 1); - } - else - { - rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); + rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); if (!rc || gwi.fatalParseError) { + if (ac) + delete ac; return NULL; } + + selCols.push_back(SRCP(rc)); } - // 10.2 TODO: direction is now a tri-state flag - rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); - orderCols.push_back(SRCP(rc)); - } + ORDER** order_item, **end; - rowCol->columnVec(selCols); - (dynamic_cast(ac))->orderCols(orderCols); - parm.reset(rowCol); - - if (gc->str_separator()) - { - string separator; - separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); - (dynamic_cast(ac))->separator(separator); - } - } - else - { - for (uint32_t i = 0; i < isp->argument_count(); i++) - { - Item* sfitemp = sfitempp[i]; - Item::Type sfitype = sfitemp->type(); - - switch (sfitype) + for (order_item = gc->get_order(), + end = order_item + gc->order_field(); order_item < end; + order_item++) { - case Item::FIELD_ITEM: - { - Item_field* ifp = reinterpret_cast(sfitemp); - SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + Item* ord_col = *(*order_item)->item; - if (!sc) + if (ord_col->type() == Item::INT_ITEM) + { + Item_int* id = (Item_int*)ord_col; + + if (id->val_int() > (int)selCols.size()) { gwi.fatalParseError = true; - break; + if (ac) + delete ac; + return NULL; } - parm.reset(sc); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); - TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); - gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); - break; + rc = selCols[id->val_int() - 1]->clone(); + rc->orderPos(id->val_int() - 1); } - - case Item::INT_ITEM: - case Item::STRING_ITEM: - case Item::REAL_ITEM: - case Item::DECIMAL_ITEM: + else { - // treat as count(*) - if (ac->aggOp() == AggregateColumn::COUNT) - ac->aggOp(AggregateColumn::COUNT_ASTERISK); + rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::NULL_ITEM: - { - //ac->aggOp(AggregateColumn::COUNT); - parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); - //ac->functionParms(parm); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::FUNC_ITEM: - { - Item_func* ifp = (Item_func*)sfitemp; - ReturnedColumn* rc = 0; - - // check count(1+1) case - vector tmpVec; - uint16_t parseInfo = 0; - parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); - - if (parseInfo & SUB_BIT) + if (!rc || gwi.fatalParseError) { - gwi.fatalParseError = true; - break; - } - else if (!gwi.fatalParseError && - !(parseInfo & AGG_BIT) && - !(parseInfo & AF_BIT) && - tmpVec.size() == 0) - { - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - FunctionColumn* fc = dynamic_cast(rc); - - if ((fc && fc->functionParms().empty()) || !fc) - { - //ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (dynamic_cast(rc)) - { - //@bug5229. handle constant function on aggregate argument - ac->constCol(SRCP(rc)); - break; - } - } - } - - // MySQL carelessly allows correlated aggregate function on the WHERE clause. - // Here is the work around to deal with that inconsistence. - // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; - ClauseType clauseType = gwi.clauseType; - - if (gwi.clauseType == WHERE) - gwi.clauseType = HAVING; - - // @bug 3603. for cases like max(rand()). try to build function first. - if (!rc) - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - - parm.reset(rc); - gwi.clauseType = clauseType; - - if (gwi.fatalParseError) - break; - - //ac->functionParms(parm); - break; - } - - case Item::REF_ITEM: - { - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (rc) - { - parm.reset(rc); - //ac->functionParms(parm); - break; + if (ac) + delete ac; + return NULL; } } - default: - { - gwi.fatalParseError = true; - //gwi.parseErrorText = "Non-supported Item in Aggregate function"; - } + // 10.2 TODO: direction is now a tri-state flag + rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); + orderCols.push_back(SRCP(rc)); } - if (gwi.fatalParseError) + rowCol->columnVec(selCols); + (dynamic_cast(ac))->orderCols(orderCols); + parm.reset(rowCol); + + if (gc->str_separator()) { - if (gwi.parseErrorText.empty()) - { - Message::Args args; - - if (item->name) - args.add(item->name); - else - args.add(""); - - gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); - } - - return NULL; + string separator; + separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); + (dynamic_cast(ac))->separator(separator); } } - } - - if (parm) - { - ac->functionParms(parm); - - if (isp->sum_func() == Item_sum::AVG_FUNC || - isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::DECIMAL; - ct.colWidth = 8; - ct.scale += 4; - break; - -#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; -#endif - - default: - break; - } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::COUNT_FUNC || - isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = parm->resultType().scale; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_FUNC || - isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - ct.colDataType = CalpontSystemCatalog::BIGINT; - - // no break, let fall through - - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - ct.colWidth = 8; - break; - - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::UBIGINT; - ct.colWidth = 8; - break; - -#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; -#endif - - default: - break; - } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::STD_FUNC || - isp->sum_func() == Item_sum::VARIANCE_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - ct.scale = 0; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = 0; - ct.precision = -16; // borrowed to indicate skip null value check on connector - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) - { - //Item_func_group_concat* gc = (Item_func_group_concat*)isp; - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::VARCHAR; - ct.colWidth = isp->max_length; - ct.precision = 0; - ac->resultType(ct); - } else { - ac->resultType(parm->resultType()); + for (uint32_t i = 0; i < isp->argument_count(); i++) + { + Item* sfitemp = sfitempp[i]; + Item::Type sfitype = sfitemp->type(); + + switch (sfitype) + { + case Item::FIELD_ITEM: + { + Item_field* ifp = reinterpret_cast(sfitemp); + SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + + if (!sc) + { + gwi.fatalParseError = true; + break; + } + + parm.reset(sc); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); + TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); + gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); + break; + } + + case Item::INT_ITEM: + case Item::STRING_ITEM: + case Item::REAL_ITEM: + case Item::DECIMAL_ITEM: + { + // treat as count(*) + if (ac->aggOp() == AggregateColumn::COUNT) + ac->aggOp(AggregateColumn::COUNT_ASTERISK); + + ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + break; + } + + case Item::NULL_ITEM: + { + parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); + ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + break; + } + + case Item::FUNC_ITEM: + { + Item_func* ifp = (Item_func*)sfitemp; + ReturnedColumn* rc = 0; + + // check count(1+1) case + vector tmpVec; + uint16_t parseInfo = 0; + parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); + + if (parseInfo & SUB_BIT) + { + gwi.fatalParseError = true; + break; + } + else if (!gwi.fatalParseError && + !(parseInfo & AGG_BIT) && + !(parseInfo & AF_BIT) && + tmpVec.size() == 0) + { + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + FunctionColumn* fc = dynamic_cast(rc); + + if ((fc && fc->functionParms().empty()) || !fc) + { + //ac->aggOp(AggregateColumn::COUNT_ASTERISK); + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (dynamic_cast(rc)) + { + //@bug5229. handle constant function on aggregate argument + ac->constCol(SRCP(rc)); + break; + } + } + } + + // MySQL carelessly allows correlated aggregate function on the WHERE clause. + // Here is the work around to deal with that inconsistence. + // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; + ClauseType clauseType = gwi.clauseType; + + if (gwi.clauseType == WHERE) + gwi.clauseType = HAVING; + + // @bug 3603. for cases like max(rand()). try to build function first. + if (!rc) + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + + parm.reset(rc); + gwi.clauseType = clauseType; + + if (gwi.fatalParseError) + break; + + break; + } + + case Item::REF_ITEM: + { + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (rc) + { + parm.reset(rc); + break; + } + } + + default: + { + gwi.fatalParseError = true; + //gwi.parseErrorText = "Non-supported Item in Aggregate function"; + } + } + + if (gwi.fatalParseError) + { + if (gwi.parseErrorText.empty()) + { + Message::Args args; + + if (item->name) + args.add(item->name); + else + args.add(""); + + gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); + } + + if (ac) + delete ac; + return NULL; + } + if (parm) + { + // MCOL-1201 multi-argument aggregate + ac->aggParms().push_back(parm); + } + } } - } - else - { - ac->resultType(colType_MysqlToIDB(isp)); - } - // adjust decimal result type according to internalDecimalScale - if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) - { - CalpontSystemCatalog::ColType ct = ac->resultType(); - ct.scale = gwi.internalDecimalScale; - ac->resultType(ct); - } - - // check for same aggregate on the select list - ac->expressionId(ci->expressionId++); - - if (gwi.clauseType != SELECT) - { - for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) + // Get result type + // Modified for MCOL-1201 multi-argument aggregate + if (ac->aggParms().size() > 0) { - if (*ac == gwi.returnedCols[i].get()) - ac->expressionId(gwi.returnedCols[i]->expressionId()); - } - } + // These are all one parm functions, so we can safely + // use the first parm for result type. + parm = ac->aggParms()[0]; + if (isp->sum_func() == Item_sum::AVG_FUNC || + isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); - // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will - // be applied in ExeMgr. When the ExeMgr fix is available, this checking - // will be taken out. - if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + switch (ct.colDataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::DECIMAL; + ct.colWidth = 8; + ct.scale += 4; + break; + + #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; + #endif + + default: + break; + } + + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::COUNT_FUNC || + isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = parm->resultType().scale; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_FUNC || + isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); + + switch (ct.colDataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + ct.colDataType = CalpontSystemCatalog::BIGINT; + + // no break, let fall through + + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + ct.colWidth = 8; + break; + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::UBIGINT; + ct.colWidth = 8; + break; + + #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; + #endif + + default: + break; + } + + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::STD_FUNC || + isp->sum_func() == Item_sum::VARIANCE_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + ct.scale = 0; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = 0; + ct.precision = -16; // borrowed to indicate skip null value check on connector + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + //Item_func_group_concat* gc = (Item_func_group_concat*)isp; + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::VARCHAR; + ct.colWidth = isp->max_length; + ct.precision = 0; + ac->resultType(ct); + } + else + { + // UDAF result type will be set below. + ac->resultType(parm->resultType()); + } + } + else + { + ac->resultType(colType_MysqlToIDB(isp)); + } + + // adjust decimal result type according to internalDecimalScale + if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) + { + CalpontSystemCatalog::ColType ct = ac->resultType(); + ct.scale = gwi.internalDecimalScale; + ac->resultType(ct); + } + + // check for same aggregate on the select list + ac->expressionId(ci->expressionId++); + + if (gwi.clauseType != SELECT) + { + for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) + { + if (*ac == gwi.returnedCols[i].get()) + ac->expressionId(gwi.returnedCols[i]->expressionId()); + } + } + + // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will + // be applied in ExeMgr. When the ExeMgr fix is available, this checking + // will be taken out. + if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "No project column found for aggregate function"; + if (ac) + delete ac; + return NULL; + } + else if (ac->constCol()) + { + gwi.count_asterisk_list.push_back(ac); + } + + // For UDAF, populate the context and call the UDAF init() function. + // The return type is (should be) set in context by init(). + if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) + { + UDAFColumn* udafc = dynamic_cast(ac); + + if (udafc) + { + mcsv1Context& context = udafc->getContext(); + context.setName(isp->func_name()); + + // Set up the return type defaults for the call to init() + context.setResultType(udafc->resultType().colDataType); + context.setColWidth(udafc->resultType().colWidth); + context.setScale(udafc->resultType().scale); + context.setPrecision(udafc->resultType().precision); + + context.setParamCount(udafc->aggParms().size()); + ColumnDatum colType; + ColumnDatum colTypes[udafc->aggParms().size()]; + // Build the column type vector. + // Modified for MCOL-1201 multi-argument aggregate + for (uint32_t i = 0; i < udafc->aggParms().size(); ++i) + { + const execplan::CalpontSystemCatalog::ColType& resultType + = udafc->aggParms()[i]->resultType(); + colType.dataType = resultType.colDataType; + colType.precision = resultType.precision; + colType.scale = resultType.scale; + colTypes[i] = colType; + } + + // Call the user supplied init() + mcsv1sdk::mcsv1_UDAF* udaf = context.getFunction(); + if (!udaf) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "Aggregate Function " + context.getName() + " doesn't exist in the ColumnStore engine"; + if (ac) + delete ac; + return NULL; + } + if (udaf->init(&context, colTypes) == mcsv1_UDAF::ERROR) + { + gwi.fatalParseError = true; + gwi.parseErrorText = udafc->getContext().getErrorMessage(); + if (ac) + delete ac; + return NULL; + } + + // UDAF_OVER_REQUIRED means that this function is for Window + // Function only. Reject it here in aggregate land. + if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) + { + gwi.fatalParseError = true; + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, + context.getName()); + if (ac) + delete ac; + return NULL; + } + + // Set the return type as set in init() + CalpontSystemCatalog::ColType ct; + ct.colDataType = context.getResultType(); + ct.colWidth = context.getColWidth(); + ct.scale = context.getScale(); + ct.precision = context.getPrecision(); + udafc->resultType(ct); + } + } + + } + catch (std::logic_error e) { gwi.fatalParseError = true; - gwi.parseErrorText = "No project column found for aggregate function"; + gwi.parseErrorText = "error building Aggregate Function: "; + gwi.parseErrorText += e.what(); + if (ac) + delete ac; return NULL; } - else if (ac->constCol()) + catch (...) { - gwi.count_asterisk_list.push_back(ac); + gwi.fatalParseError = true; + gwi.parseErrorText = "error building Aggregate Function: Unspecified exception"; + if (ac) + delete ac; + return NULL; } - - // For UDAF, populate the context and call the UDAF init() function. - if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) - { - UDAFColumn* udafc = dynamic_cast(ac); - - if (udafc) - { - mcsv1Context& context = udafc->getContext(); - context.setName(isp->func_name()); - - // Set up the return type defaults for the call to init() - context.setResultType(udafc->resultType().colDataType); - context.setColWidth(udafc->resultType().colWidth); - context.setScale(udafc->resultType().scale); - context.setPrecision(udafc->resultType().precision); - - COL_TYPES colTypes; - execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; - - // Build the column type vector. For now, there is only one - colTypes.push_back(make_pair(udafc->functionParms()->alias(), udafc->functionParms()->resultType().colDataType)); - - // Call the user supplied init() - if (context.getFunction()->init(&context, colTypes) == mcsv1_UDAF::ERROR) - { - gwi.fatalParseError = true; - gwi.parseErrorText = udafc->getContext().getErrorMessage(); - return NULL; - } - - if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) - { - gwi.fatalParseError = true; - gwi.parseErrorText = - logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, - context.getName()); - return NULL; - } - - // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; - ct.colDataType = context.getResultType(); - ct.colWidth = context.getColWidth(); - ct.scale = context.getScale(); - ct.precision = context.getPrecision(); - udafc->resultType(ct); - } - } - return ac; } @@ -7839,7 +7909,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->functionParms(minSc); + (*coliter)->aggParms().push_back(minSc); } std::vector::iterator funciter; @@ -9949,7 +10019,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->functionParms(minSc); + (*coliter)->aggParms().push_back(minSc); } std::vector::iterator funciter; diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 1ee343e90..b39da4ea2 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -781,8 +781,11 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, bool h //double double_val = *(double*)(&value); //f2->store(double_val); - if (f2->decimals() < (uint32_t)row.getScale(s)) - f2->dec = (uint32_t)row.getScale(s); + if ((f2->decimals() == DECIMAL_NOT_SPECIFIED && row.getScale(s) > 0) + || f2->decimals() < row.getScale(s)) + { + f2->dec = row.getScale(s); + } f2->store(dl); @@ -5275,8 +5278,6 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE execplan::CalpontSelectExecutionPlan::ColumnMap::iterator colMapIter; execplan::CalpontSelectExecutionPlan::ColumnMap::iterator condColMapIter; execplan::ParseTree* ptIt; - execplan::ReturnedColumn* rcIt; - for (TABLE_LIST* tl = gi.groupByTables; tl; tl = tl->next_local) { mapiter = ci->tableMap.find(tl->table); diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp index 4b648cb15..8d68a6260 100644 --- a/dbcon/mysql/ha_window_function.cpp +++ b/dbcon/mysql/ha_window_function.cpp @@ -340,6 +340,7 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n ac->distinct(item_sum->has_with_distinct()); Window_spec* win_spec = wf->window_spec; SRCP srcp; + CalpontSystemCatalog::ColType ct; // For return type // arguments vector funcParms; @@ -370,18 +371,25 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n context.setColWidth(rt.colWidth); context.setScale(rt.scale); context.setPrecision(rt.precision); + context.setParamCount(funcParms.size()); + + mcsv1sdk::ColumnDatum colType; + mcsv1sdk::ColumnDatum colTypes[funcParms.size()]; // Turn on the Analytic flag so the function is aware it is being called // as a Window Function. context.setContextFlag(CONTEXT_IS_ANALYTIC); - COL_TYPES colTypes; - execplan::CalpontSelectExecutionPlan::ColumnMap::iterator cmIter; - // Build the column type vector. + // Modified for MCOL-1201 multi-argument aggregate for (size_t i = 0; i < funcParms.size(); ++i) { - colTypes.push_back(make_pair(funcParms[i]->alias(), funcParms[i]->resultType().colDataType)); + const execplan::CalpontSystemCatalog::ColType& resultType + = funcParms[i]->resultType(); + colType.dataType = resultType.colDataType; + colType.precision = resultType.precision; + colType.scale = resultType.scale; + colTypes[i] = colType; } // Call the user supplied init() @@ -401,7 +409,6 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n } // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; ct.colDataType = context.getResultType(); ct.colWidth = context.getColWidth(); ct.scale = context.getScale(); @@ -419,10 +426,10 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n { case Item_sum::UDF_SUM_FUNC: { - uint64_t bIgnoreNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)); - char sIgnoreNulls[18]; - sprintf(sIgnoreNulls, "%lu", bIgnoreNulls); - srcp.reset(new ConstantColumn(sIgnoreNulls, (uint64_t)bIgnoreNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT + uint64_t bRespectNulls = (ac->getUDAFContext().getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) ? 0 : 1; + char sRespectNulls[18]; + sprintf(sRespectNulls, "%lu", bRespectNulls); + srcp.reset(new ConstantColumn(sRespectNulls, (uint64_t)bRespectNulls, ConstantColumn::NUM)); // IGNORE/RESPECT NULLS. 1 => RESPECT funcParms.push_back(srcp); break; } @@ -881,11 +888,13 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n return NULL; } - ac->resultType(colType_MysqlToIDB(item_sum)); - - // bug5736. Make the result type double for some window functions when - // infinidb_double_for_decimal_math is set. - ac->adjustResultType(); + if (item_sum->sum_func() != Item_sum::UDF_SUM_FUNC) + { + ac->resultType(colType_MysqlToIDB(item_sum)); + // bug5736. Make the result type double for some window functions when + // infinidb_double_for_decimal_math is set. + ac->adjustResultType(); + } ac->expressionId(ci->expressionId++); diff --git a/utils/common/any.hpp b/utils/common/any.hpp index be0ca679b..5408c5c87 100755 --- a/utils/common/any.hpp +++ b/utils/common/any.hpp @@ -9,123 +9,142 @@ * http://www.boost.org/LICENSE_1_0.txt */ +#include #include namespace static_any { namespace anyimpl { + struct bad_any_cast + { + }; - struct bad_any_cast - { - }; + struct empty_any + { + }; - struct empty_any - { - }; + struct base_any_policy + { + virtual void static_delete(void** x) = 0; + virtual void copy_from_value(void const* src, void** dest) = 0; + virtual void clone(void* const* src, void** dest) = 0; + virtual void move(void* const* src, void** dest) = 0; + virtual void* get_value(void** src) = 0; + virtual size_t get_size() = 0; + }; - struct base_any_policy - { - virtual void static_delete(void** x) = 0; - virtual void copy_from_value(void const* src, void** dest) = 0; - virtual void clone(void* const* src, void** dest) = 0; - virtual void move(void* const* src, void** dest) = 0; - virtual void* get_value(void** src) = 0; - virtual size_t get_size() = 0; - }; + template + struct typed_base_any_policy : base_any_policy + { + virtual size_t get_size() + { + return sizeof(T); + } + }; - template - struct typed_base_any_policy : base_any_policy - { - virtual size_t get_size() { return sizeof(T); } - }; + template + struct small_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) + { + } + virtual void copy_from_value(void const* src, void** dest) + { + new(dest) T(*reinterpret_cast(src)); + } + virtual void clone(void* const* src, void** dest) + { + *dest = *src; + } + virtual void move(void* const* src, void** dest) + { + *dest = *src; + } + virtual void* get_value(void** src) + { + return reinterpret_cast(src); + } + }; - template - struct small_any_policy : typed_base_any_policy - { - virtual void static_delete(void** x) { } - virtual void copy_from_value(void const* src, void** dest) - { new(dest) T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) { *dest = *src; } - virtual void move(void* const* src, void** dest) { *dest = *src; } - virtual void* get_value(void** src) { return reinterpret_cast(src); } - }; - - template - struct big_any_policy : typed_base_any_policy - { - virtual void static_delete(void** x) + template + struct big_any_policy : typed_base_any_policy + { + virtual void static_delete(void** x) { if (*x) - delete(*reinterpret_cast(x)); + delete(*reinterpret_cast(x)); *x = NULL; } - virtual void copy_from_value(void const* src, void** dest) + virtual void copy_from_value(void const* src, void** dest) { - *dest = new T(*reinterpret_cast(src)); + *dest = new T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) + virtual void clone(void* const* src, void** dest) { - *dest = new T(**reinterpret_cast(src)); + *dest = new T(**reinterpret_cast(src)); } - virtual void move(void* const* src, void** dest) + virtual void move(void* const* src, void** dest) { - (*reinterpret_cast(dest))->~T(); - **reinterpret_cast(dest) = **reinterpret_cast(src); + (*reinterpret_cast(dest))->~T(); + **reinterpret_cast(dest) = **reinterpret_cast(src); } - virtual void* get_value(void** src) { return *src; } - }; + virtual void* get_value(void** src) + { + return *src; + } + }; - template - struct choose_policy - { - typedef big_any_policy type; - }; + template + struct choose_policy + { + typedef big_any_policy type; + }; - template - struct choose_policy - { - typedef small_any_policy type; - }; + template + struct choose_policy + { + typedef small_any_policy type; + }; - struct any; + struct any; - /// Choosing the policy for an any type is illegal, but should never happen. - /// This is designed to throw a compiler error. - template<> - struct choose_policy - { - typedef void type; - }; + /// Choosing the policy for an any type is illegal, but should never happen. + /// This is designed to throw a compiler error. + template<> + struct choose_policy + { + typedef void type; + }; - /// Specializations for small types. - #define SMALL_POLICY(TYPE) template<> struct \ - choose_policy { typedef small_any_policy type; }; + /// Specializations for small types. +#define SMALL_POLICY(TYPE) template<> struct \ + choose_policy { typedef small_any_policy type; }; - SMALL_POLICY(char); - SMALL_POLICY(signed char); - SMALL_POLICY(unsigned char); - SMALL_POLICY(signed short); - SMALL_POLICY(unsigned short); - SMALL_POLICY(signed int); - SMALL_POLICY(unsigned int); - SMALL_POLICY(signed long); - SMALL_POLICY(unsigned long); - SMALL_POLICY(signed long long); - SMALL_POLICY(unsigned long long); - SMALL_POLICY(float); - SMALL_POLICY(double); - SMALL_POLICY(bool); + SMALL_POLICY(char); + SMALL_POLICY(signed char); + SMALL_POLICY(unsigned char); + SMALL_POLICY(signed short); + SMALL_POLICY(unsigned short); + SMALL_POLICY(signed int); + SMALL_POLICY(unsigned int); + SMALL_POLICY(signed long); + SMALL_POLICY(unsigned long); + SMALL_POLICY(signed long long); + SMALL_POLICY(unsigned long long); + SMALL_POLICY(float); + SMALL_POLICY(double); + SMALL_POLICY(bool); - #undef SMALL_POLICY +#undef SMALL_POLICY - /// This function will return a different policy for each type. - template - base_any_policy* get_policy() - { - static typename choose_policy::type policy; - return &policy; - }; + /// This function will return a different policy for each type. + template + base_any_policy* get_policy() + { + static typename choose_policy::type policy; + return &policy; + }; } class any @@ -139,37 +158,40 @@ public: /// Initializing constructor. template any(const T& x) - : policy(anyimpl::get_policy()), object(NULL) + : policy(anyimpl::get_policy()), object(NULL) { assign(x); } /// Empty constructor. any() - : policy(anyimpl::get_policy()), object(NULL) - { } + : policy(anyimpl::get_policy()), object(NULL) + { + } /// Special initializing constructor for string literals. any(const char* x) - : policy(anyimpl::get_policy()), object(NULL) - { + : policy(anyimpl::get_policy()), object(NULL) + { assign(x); } /// Copy constructor. any(const any& x) - : policy(anyimpl::get_policy()), object(NULL) - { + : policy(anyimpl::get_policy()), object(NULL) + { assign(x); } /// Destructor. - ~any() { + ~any() + { policy->static_delete(&object); } /// Assignment function from another any. - any& assign(const any& x) { + any& assign(const any& x) + { reset(); policy = x.policy; policy->clone(&x.object, &object); @@ -178,7 +200,8 @@ public: /// Assignment function. template - any& assign(const T& x) { + any& assign(const T& x) + { reset(); policy = anyimpl::get_policy(); policy->copy_from_value(&x, &object); @@ -197,8 +220,42 @@ public: return assign(x); } + /// Less than operator for sorting + bool operator<(const any& x) const + { + if (policy == x.policy) + { + void* p1 = const_cast(object); + void* p2 = const_cast(x.object); + return memcmp(policy->get_value(&p1), + x.policy->get_value(&p2), + policy->get_size()) < 0 ? 1 : 0; + } + return 0; + } + + /// equal operator + bool operator==(const any& x) const + { + if (policy == x.policy) + { + void* p1 = const_cast(object); + void* p2 = const_cast(x.object); + return memcmp(policy->get_value(&p1), + x.policy->get_value(&p2), + policy->get_size()) == 0 ? 1 : 0; + } + return 0; + } + /// Utility functions - any& swap(any& x) { + uint8_t getHash() const + { + void* p1 = const_cast(object); + return *(uint64_t*)policy->get_value(&p1) % 4048; + } + any& swap(any& x) + { std::swap(policy, x.policy); std::swap(object, x.object); return *this; @@ -206,27 +263,32 @@ public: /// Cast operator. You can only cast to the original type. template - T& cast() { - if (policy != anyimpl::get_policy()) + T& cast() + { + if (policy != anyimpl::get_policy()) throw anyimpl::bad_any_cast(); T* r = reinterpret_cast(policy->get_value(&object)); return *r; } /// Returns true if the any contains no value. - bool empty() const { + bool empty() const + { return policy == anyimpl::get_policy(); } /// Frees any allocated memory, and sets the value to NULL. - void reset() { + void reset() + { policy->static_delete(&object); policy = anyimpl::get_policy(); } /// Returns true if the two types are the same. - bool compatible(const any& x) const { + bool compatible(const any& x) const + { return policy == x.policy; } }; + } diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 8d110cfc8..c1f5bbd63 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -215,6 +215,22 @@ inline string getStringNullValue() namespace rowgroup { +const std::string typeStr(""); +const static_any::any& RowAggregation::charTypeId((char)1); +const static_any::any& RowAggregation::scharTypeId((signed char)1); +const static_any::any& RowAggregation::shortTypeId((short)1); +const static_any::any& RowAggregation::intTypeId((int)1); +const static_any::any& RowAggregation::longTypeId((long)1); +const static_any::any& RowAggregation::llTypeId((long long)1); +const static_any::any& RowAggregation::ucharTypeId((unsigned char)1); +const static_any::any& RowAggregation::ushortTypeId((unsigned short)1); +const static_any::any& RowAggregation::uintTypeId((unsigned int)1); +const static_any::any& RowAggregation::ulongTypeId((unsigned long)1); +const static_any::any& RowAggregation::ullTypeId((unsigned long long)1); +const static_any::any& RowAggregation::floatTypeId((float)1); +const static_any::any& RowAggregation::doubleTypeId((double)1); +const static_any::any& RowAggregation::strTypeId(typeStr); + KeyStorage::KeyStorage(const RowGroup& keys, Row** tRow) : tmpRow(tRow), rg(keys) { RGData data(rg); @@ -691,7 +707,8 @@ RowAggregation::RowAggregation(const vector& rowAggGroupByCol RowAggregation::RowAggregation(const RowAggregation& rhs): fAggMapPtr(NULL), fRowGroupOut(NULL), fTotalRowCount(0), fMaxTotalRowCount(AGG_ROWGROUP_SIZE), - fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0) + fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0), + fRGContext(rhs.fRGContext) { //fGroupByCols.clear(); //fFunctionCols.clear(); @@ -756,7 +773,6 @@ void RowAggregation::addRowGroup(const RowGroup* pRows, vector& in { // this function is for threaded aggregation, which is for group by and distinct. // if (countSpecial(pRows)) - Row rowIn; pRows->initRow(&rowIn); @@ -790,7 +806,7 @@ void RowAggregation::setJoinRowGroups(vector* pSmallSideRG, RowGroup* } //------------------------------------------------------------------------------ -// For UDAF, we need to sometimes start a new context. +// For UDAF, we need to sometimes start a new fRGContext. // // This will be called any number of times by each of the batchprimitiveprocessor // threads on the PM and by multple threads on the UM. It must remain @@ -801,29 +817,29 @@ void RowAggregation::resetUDAF(uint64_t funcColID) // Get the UDAF class pointer and store in the row definition object. RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColID].get()); - // resetUDAF needs to be re-entrant. Since we're modifying the context object - // by creating a new userData, we need a local copy. The copy constructor - // doesn't copy userData. - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + // RowAggregation and it's functions need to be re-entrant which means + // each instance (thread) needs its own copy of the context object. + // Note: operator=() doesn't copy userData. + fRGContext = rowUDAF->fUDAFContext; // Call the user reset for the group userData. Since, at this point, // context's userData will be NULL, reset will generate a new one. mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->reset(&rgContext); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } fRow.setUserDataStore(fRowGroupOut->getRGData()->getUserDataStore()); - fRow.setUserData(rgContext, - rgContext.getUserDataSP(), - rgContext.getUserDataSize(), + fRow.setUserData(fRGContext, + fRGContext.getUserDataSP(), + fRGContext.getUserDataSize(), rowUDAF->fAuxColumnIndex); - rgContext.setUserData(NULL); // Prevents calling deleteUserData on the context. + fRGContext.setUserData(NULL); // Prevents calling deleteUserData on the fRGContext. } //------------------------------------------------------------------------------ @@ -873,7 +889,6 @@ void RowAggregation::initialize() } } - // Save the RowGroup data pointer fResultDataVec.push_back(fRowGroupOut->getRGData()); @@ -1658,10 +1673,11 @@ void RowAggregation::updateEntry(const Row& rowIn) { for (uint64_t i = 0; i < fFunctionCols.size(); i++) { - int64_t colIn = fFunctionCols[i]->fInputColumnIndex; - int64_t colOut = fFunctionCols[i]->fOutputColumnIndex; + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[i]; + int64_t colIn = pFunctionCol->fInputColumnIndex; + int64_t colOut = pFunctionCol->fOutputColumnIndex; - switch (fFunctionCols[i]->fAggFunction) + switch (pFunctionCol->fAggFunction) { case ROWAGG_COUNT_COL_NAME: @@ -1675,7 +1691,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_MIN: case ROWAGG_MAX: case ROWAGG_SUM: - doMinMaxSum(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction); + doMinMaxSum(rowIn, colIn, colOut, pFunctionCol->fAggFunction); break; case ROWAGG_AVG: @@ -1692,7 +1708,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_BIT_OR: case ROWAGG_BIT_XOR: { - doBitOp(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction); + doBitOp(rowIn, colIn, colOut, pFunctionCol->fAggFunction); break; } @@ -1707,11 +1723,11 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); + RowUDAFFunctionCol* rowUDAF = dynamic_cast(pFunctionCol.get()); if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF); + doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF, i); } else { @@ -1725,7 +1741,7 @@ void RowAggregation::updateEntry(const Row& rowIn) { std::ostringstream errmsg; errmsg << "RowAggregation: function (id = " << - (uint64_t) fFunctionCols[i]->fAggFunction << ") is not supported."; + (uint64_t) pFunctionCol->fAggFunction << ") is not supported."; cerr << errmsg.str() << endl; throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); break; @@ -1997,131 +2013,142 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu } void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF) + RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { - std::vector valsIn; - execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupIn.getColTypes()[colIn]; - std::vector dataFlags; + int32_t paramCount = fRGContext.getParameterCount(); + // The vector of parameters to be sent to the UDAF + mcsv1sdk::ColumnDatum valsIn[paramCount]; + uint32_t dataFlags[paramCount]; - // Get the context for this rowGroup. Make a copy so we're thread safe. - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); - - // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; - - if (isNull(&fRowGroupIn, rowIn, colIn) == true) + execplan::CalpontSystemCatalog::ColDataType colDataType; + for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) { - if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + // Turn on NULL flags + dataFlags[i] = 0; + if (isNull(&fRowGroupIn, rowIn, colIn) == true) { - return; + if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + { + return; + } + dataFlags[i] |= mcsv1sdk::PARAM_IS_NULL; + } + + colDataType = fRowGroupIn.getColTypes()[colIn]; + if (!fRGContext.isParamNull(i)) + { + switch (colDataType) + { + case execplan::CalpontSystemCatalog::TINYINT: + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + case execplan::CalpontSystemCatalog::INT: + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + break; + } + + case execplan::CalpontSystemCatalog::UTINYINT: + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + case execplan::CalpontSystemCatalog::UINT: + case execplan::CalpontSystemCatalog::UBIGINT: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + datum.columnData = rowIn.getUintField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + { + datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; + datum.columnData = rowIn.getDoubleField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + { + datum.dataType = execplan::CalpontSystemCatalog::FLOAT; + datum.columnData = rowIn.getFloatField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + datum.columnData = rowIn.getUintField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::TIME: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + datum.columnData = rowIn.getIntField(colIn); + break; + } + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + { + datum.dataType = colDataType; + datum.columnData = rowIn.getStringField(colIn); + break; + } + + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation " << fRGContext.getName() << + ": No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } + } } - flag |= mcsv1sdk::PARAM_IS_NULL; - } - - flags.push_back(flag); - rgContext.setDataFlags(&flags); - - mcsv1sdk::ColumnDatum datum; - - if (!rgContext.isParamNull(0)) - { - switch (colDataType) + // MCOL-1201: If there are multiple parameters, the next fFunctionCols + // will have the column used. By incrementing the funcColsIdx (passed by + // ref, we also increment the caller's index. + if (fFunctionCols.size() > funcColsIdx + 1 + && fFunctionCols[funcColsIdx+1]->fAggFunction == ROWAGG_MULTI_PARM) { - case execplan::CalpontSystemCatalog::TINYINT: - case execplan::CalpontSystemCatalog::SMALLINT: - case execplan::CalpontSystemCatalog::MEDINT: - case execplan::CalpontSystemCatalog::INT: - case execplan::CalpontSystemCatalog::BIGINT: - case execplan::CalpontSystemCatalog::DECIMAL: - case execplan::CalpontSystemCatalog::UDECIMAL: - { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - datum.scale = fRowGroupIn.getScale()[colIn]; - datum.precision = fRowGroupIn.getPrecision()[colIn]; - break; - } - - case execplan::CalpontSystemCatalog::UTINYINT: - case execplan::CalpontSystemCatalog::USMALLINT: - case execplan::CalpontSystemCatalog::UMEDINT: - case execplan::CalpontSystemCatalog::UINT: - case execplan::CalpontSystemCatalog::UBIGINT: - { - datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::DOUBLE: - case execplan::CalpontSystemCatalog::UDOUBLE: - { - datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; - datum.columnData = rowIn.getDoubleField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::FLOAT: - case execplan::CalpontSystemCatalog::UFLOAT: - { - datum.dataType = execplan::CalpontSystemCatalog::FLOAT; - datum.columnData = rowIn.getFloatField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::DATE: - case execplan::CalpontSystemCatalog::DATETIME: - { - datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::TIME: - { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - break; - } - - case execplan::CalpontSystemCatalog::CHAR: - case execplan::CalpontSystemCatalog::VARCHAR: - case execplan::CalpontSystemCatalog::TEXT: - case execplan::CalpontSystemCatalog::VARBINARY: - case execplan::CalpontSystemCatalog::CLOB: - case execplan::CalpontSystemCatalog::BLOB: - { - datum.dataType = colDataType; - datum.columnData = rowIn.getStringField(colIn); - break; - } - - default: - { - std::ostringstream errmsg; - errmsg << "RowAggregation " << rgContext.getName() << - ": No logic for data type: " << colDataType; - throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); - break; - } + ++funcColsIdx; + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; + colIn = pFunctionCol->fInputColumnIndex; + colOut = pFunctionCol->fOutputColumnIndex; + } + else + { + break; } } - valsIn.push_back(datum); - // The intermediate values are stored in userData referenced by colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setDataFlags(dataFlags); + fRGContext.setUserData(fRow.getUserData(colAux)); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->nextValue(&rgContext, valsIn); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } } @@ -2218,6 +2245,7 @@ RowAggregationUM::RowAggregationUM(const RowAggregationUM& rhs) : fHasAvg(rhs.fHasAvg), fKeyOnHeap(rhs.fKeyOnHeap), fHasStatsFunc(rhs.fHasStatsFunc), + fHasUDAF(rhs.fHasUDAF), fExpression(rhs.fExpression), fTotalMemUsage(rhs.fTotalMemUsage), fRm(rhs.fRm), @@ -2419,7 +2447,7 @@ void RowAggregationUM::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { @@ -2585,22 +2613,6 @@ void RowAggregationUM::calculateAvgColumns() // Sets the value from valOut into column colOut, performing any conversions. void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) { - static const static_any::any& charTypeId((char)1); - static const static_any::any& scharTypeId((signed char)1); - static const static_any::any& shortTypeId((short)1); - static const static_any::any& intTypeId((int)1); - static const static_any::any& longTypeId((long)1); - static const static_any::any& llTypeId((long long)1); - static const static_any::any& ucharTypeId((unsigned char)1); - static const static_any::any& ushortTypeId((unsigned short)1); - static const static_any::any& uintTypeId((unsigned int)1); - static const static_any::any& ulongTypeId((unsigned long)1); - static const static_any::any& ullTypeId((unsigned long long)1); - static const static_any::any& floatTypeId((float)1); - static const static_any::any& doubleTypeId((double)1); - static const std::string typeStr(""); - static const static_any::any& strTypeId(typeStr); - execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; if (valOut.empty()) @@ -2609,6 +2621,179 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) return; } + int64_t intOut = 0; + uint64_t uintOut = 0; + float floatOut = 0.0; + double doubleOut = 0.0; + ostringstream oss; + std::string strOut; + + bool bSetSuccess = false; + switch (colDataType) + { + case execplan::CalpontSystemCatalog::BIT: + case execplan::CalpontSystemCatalog::TINYINT: + if (valOut.compatible(charTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + else if (valOut.compatible(scharTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + if (bSetSuccess) + { + fRow.setIntField<1>(intOut, colOut); + } + break; + + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + if (valOut.compatible(shortTypeId)) + { + intOut = valOut.cast(); + fRow.setIntField<2>(intOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::INT: + if (valOut.compatible(uintTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + else if (valOut.compatible(longTypeId)) + { + intOut = valOut.cast(); + bSetSuccess = true; + } + if (bSetSuccess) + { + fRow.setIntField<4>(intOut, colOut); + } + break; + + case execplan::CalpontSystemCatalog::BIGINT: + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + if (valOut.compatible(llTypeId)) + { + intOut = valOut.cast(); + fRow.setIntField<8>(intOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UTINYINT: + if (valOut.compatible(ucharTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<1>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + if (valOut.compatible(ushortTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<2>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UINT: + if (valOut.compatible(uintTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<4>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::UBIGINT: + if (valOut.compatible(ulongTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<8>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + if (valOut.compatible(ulongTypeId)) + { + uintOut = valOut.cast(); + fRow.setUintField<8>(uintOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + if (valOut.compatible(floatTypeId)) + { + floatOut = valOut.cast(); + fRow.setFloatField(floatOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + if (valOut.compatible(doubleTypeId)) + { + doubleOut = valOut.cast(); + fRow.setDoubleField(doubleOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + fRow.setStringField(strOut, colOut); + bSetSuccess = true; + } + break; + + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + if (valOut.compatible(strTypeId)) + { + std::string strOut = valOut.cast(); + fRow.setVarBinaryField(strOut, colOut); + bSetSuccess = true; + } + break; + + default: + { + std::ostringstream errmsg; + errmsg << "RowAggregation: No logic for data type: " << colDataType; + throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); + break; + } + } + if (!bSetSuccess) + { + SetUDAFAnyValue(valOut, colOut); + } +} + +void RowAggregationUM::SetUDAFAnyValue(static_any::any& valOut, int64_t colOut) +{ + execplan::CalpontSystemCatalog::ColDataType colDataType = fRowGroupOut->getColTypes()[colOut]; + // This may seem a bit convoluted. Users shouldn't return a type // that they didn't set in mcsv1_UDAF::init(), but this // handles whatever return type is given and casts @@ -2814,7 +2999,7 @@ void RowAggregationUM::calculateUDAFColumns() continue; rowUDAF = dynamic_cast(fFunctionCols[i].get()); - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); + fRGContext = rowUDAF->fUDAFContext; int64_t colOut = rowUDAF->fOutputColumnIndex; int64_t colAux = rowUDAF->fAuxColumnIndex; @@ -2826,26 +3011,26 @@ void RowAggregationUM::calculateUDAFColumns() fRowGroupOut->getRow(j, &fRow); // Turn the NULL flag off. We can't know NULL at this point - rgContext.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); // The intermediate values are stored in colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setUserData(fRow.getUserData(colAux)); // Call the UDAF evaluate function mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->evaluate(&rgContext, valOut); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::QueryDataExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); } - rgContext.setUserData(NULL); + fRGContext.setUserData(NULL); } } @@ -3116,54 +3301,60 @@ void RowAggregationUM::doNullConstantAggregate(const ConstantAggData& aggData, u { // For a NULL constant, call nextValue with NULL and then evaluate. bool bInterrupted = false; - mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext); - context.setInterrupted(bInterrupted); - context.createUserData(); + fRGContext.setInterrupted(bInterrupted); + fRGContext.createUserData(); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - std::vector valsIn; + mcsv1sdk::ColumnDatum valsIn[1]; // Call a reset, then nextValue, then execute. This will evaluate // the UDAF for the constant. - rc = context.getFunction()->reset(&context); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } - +#if 0 + uint32_t dataFlags[fRGContext.getParameterCount()]; + for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) + { + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + // Turn on NULL flags + dataFlags[i] = 0; + } +#endif // Turn the NULL and CONSTANT flags on. - std::vector flags; - uint32_t flag = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; - flags.push_back(flag); - context.setDataFlags(&flags); + uint32_t flags[1]; + flags[0] = mcsv1sdk::PARAM_IS_NULL | mcsv1sdk::PARAM_IS_CONSTANT; + fRGContext.setDataFlags(flags); // Create a dummy datum - mcsv1sdk::ColumnDatum datum; + mcsv1sdk::ColumnDatum& datum = valsIn[0]; datum.dataType = execplan::CalpontSystemCatalog::BIGINT; datum.columnData = 0; - valsIn.push_back(datum); - rc = context.getFunction()->nextValue(&context, valsIn); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } static_any::any valOut; - rc = context.getFunction()->evaluate(&context, valOut); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); - context.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); } break; @@ -3460,30 +3651,28 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData case ROWAGG_UDAF: { bool bInterrupted = false; - mcsv1sdk::mcsv1Context context(((RowUDAFFunctionCol*)fFunctionCols[i].get())->fUDAFContext); - context.setInterrupted(bInterrupted); - context.createUserData(); + fRGContext.setInterrupted(bInterrupted); + fRGContext.createUserData(); mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - std::vector valsIn; + mcsv1sdk::ColumnDatum valsIn[1]; // Call a reset, then nextValue, then execute. This will evaluate // the UDAF for the constant. - rc = context.getFunction()->reset(&context); + rc = fRGContext.getFunction()->reset(&fRGContext); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Turn the CONSTANT flags on. - std::vector flags; - uint32_t flag = mcsv1sdk::PARAM_IS_CONSTANT; - flags.push_back(flag); - context.setDataFlags(&flags); + uint32_t flags[1]; + flags[0] = mcsv1sdk::PARAM_IS_CONSTANT; + fRGContext.setDataFlags(flags); // Create a datum item for sending to UDAF - mcsv1sdk::ColumnDatum datum; + mcsv1sdk::ColumnDatum& datum = valsIn[0]; datum.dataType = (CalpontSystemCatalog::ColDataType)colDataType; switch (colDataType) @@ -3567,27 +3756,27 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData break; } - valsIn.push_back(datum); - rc = context.getFunction()->nextValue(&context, valsIn); + rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } static_any::any valOut; - rc = context.getFunction()->evaluate(&context, valOut); + rc = fRGContext.getFunction()->evaluate(&fRGContext, valOut); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { - context.setInterrupted(true); - throw logging::QueryDataExcept(context.getErrorMessage(), logging::aggregateFuncErr); + fRGContext.setInterrupted(true); + throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } // Set the returned value into the output row SetUDAFValue(valOut, colOut); - context.setDataFlags(NULL); + fRGContext.setDataFlags(NULL); } break; @@ -3806,7 +3995,7 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { @@ -4011,45 +4200,43 @@ void RowAggregationUMP2::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut // rowUDAF(in) - pointer to the RowUDAFFunctionCol for this UDAF instance //------------------------------------------------------------------------------ void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF) + RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { static_any::any valOut; - mcsv1sdk::mcsv1Context rgContext(rowUDAF->fUDAFContext); // Get the user data boost::shared_ptr userData = rowIn.getUserData(colIn + 1); // Unlike other aggregates, the data isn't in colIn, so testing it for NULL // there won't help. In case of NULL, userData will be NULL. - std::vector flags; - uint32_t flag = 0; + uint32_t flags[1]; + flags[0] = 0; if (!userData) { - if (rgContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) + if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { return; } // Turn on NULL flags - flag |= mcsv1sdk::PARAM_IS_NULL; + flags[0] |= mcsv1sdk::PARAM_IS_NULL; } - flags.push_back(flag); - rgContext.setDataFlags(&flags); + fRGContext.setDataFlags(flags); // The intermediate values are stored in colAux. - rgContext.setUserData(fRow.getUserData(colAux)); + fRGContext.setUserData(fRow.getUserData(colAux)); // Call the UDAF subEvaluate method mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = rgContext.getFunction()->subEvaluate(&rgContext, userData.get()); - rgContext.setUserData(NULL); + rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userData.get()); + fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { rowUDAF->bInterrupted = true; - throw logging::IDBExcept(rgContext.getErrorMessage(), logging::aggregateFuncErr); + throw logging::IDBExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } } @@ -4246,7 +4433,7 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn) if (rowUDAF) { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF); + doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); } else { diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index b6294f193..282f354fc 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -110,6 +110,9 @@ enum RowAggFunctionType // User Defined Aggregate Function ROWAGG_UDAF, + // If an Aggregate has more than one parameter, this will be used for parameters after the first + ROWAGG_MULTI_PARM, + // internal function type to avoid duplicate the work // handling ROWAGG_COUNT_NO_OP, ROWAGG_DUP_FUNCT and ROWAGG_DUP_AVG is a little different // ROWAGG_COUNT_NO_OP : count done by AVG, no need to copy @@ -583,7 +586,7 @@ protected: virtual void doAvg(const Row&, int64_t, int64_t, int64_t); virtual void doStatistics(const Row&, int64_t, int64_t, int64_t); virtual void doBitOp(const Row&, int64_t, int64_t, int); - virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); + virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); virtual bool countSpecial(const RowGroup* pRG) { fRow.setIntField<8>(fRow.getIntField<8>(0) + pRG->getRowCount(), 0); @@ -660,6 +663,25 @@ protected: //need access to rowgroup storage holding the rows to hash & ==. friend class AggHasher; friend class AggComparator; + + // We need a separate copy for each thread. + mcsv1sdk::mcsv1Context fRGContext; + + // These are handy for testing the actual type of static_any for UDAF + static const static_any::any& charTypeId; + static const static_any::any& scharTypeId; + static const static_any::any& shortTypeId; + static const static_any::any& intTypeId; + static const static_any::any& longTypeId; + static const static_any::any& llTypeId; + static const static_any::any& ucharTypeId; + static const static_any::any& ushortTypeId; + static const static_any::any& uintTypeId; + static const static_any::any& ulongTypeId; + static const static_any::any& ullTypeId; + static const static_any::any& floatTypeId; + static const static_any::any& doubleTypeId; + static const static_any::any& strTypeId; }; //------------------------------------------------------------------------------ @@ -783,6 +805,9 @@ protected: // Sets the value from valOut into column colOut, performing any conversions. void SetUDAFValue(static_any::any& valOut, int64_t colOut); + // If the datatype returned by evaluate isn't what we expect, convert. + void SetUDAFAnyValue(static_any::any& valOut, int64_t colOut); + // calculate the UDAF function all rows received. UM only function. void calculateUDAFColumns(); @@ -877,7 +902,7 @@ protected: void doStatistics(const Row&, int64_t, int64_t, int64_t); void doGroupConcat(const Row&, int64_t, int64_t); void doBitOp(const Row&, int64_t, int64_t, int); - void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF); + void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); bool countSpecial(const RowGroup* pRG) { return false; diff --git a/utils/udfsdk/CMakeLists.txt b/utils/udfsdk/CMakeLists.txt index e69ff4d88..01009e35a 100755 --- a/utils/udfsdk/CMakeLists.txt +++ b/utils/udfsdk/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ########### next target ############### -set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp) +set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp regr_avgx.cpp avgx.cpp) add_definitions(-DMYSQL_DYNAMIC_PLUGIN) diff --git a/utils/udfsdk/allnull.cpp b/utils/udfsdk/allnull.cpp index b6b8d79da..247b9e28f 100644 --- a/utils/udfsdk/allnull.cpp +++ b/utils/udfsdk/allnull.cpp @@ -27,11 +27,11 @@ struct allnull_data #define OUT_TYPE int64_t mcsv1_UDAF::ReturnCode allnull::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { context->setUserDataSize(sizeof(allnull_data)); - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -52,8 +52,7 @@ mcsv1_UDAF::ReturnCode allnull::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode allnull::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { struct allnull_data* data = (struct allnull_data*)context->getUserData()->data; diff --git a/utils/udfsdk/allnull.h b/utils/udfsdk/allnull.h index 86697b052..da17f5d6b 100644 --- a/utils/udfsdk/allnull.h +++ b/utils/udfsdk/allnull.h @@ -103,7 +103,7 @@ public: * colTypes or wrong number of arguments. Else return * mcsv1_UDAF::SUCCESS. */ - virtual ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); /** * reset() @@ -138,7 +138,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() diff --git a/utils/udfsdk/avg_mode.cpp b/utils/udfsdk/avg_mode.cpp index f39b5e402..5429183d9 100644 --- a/utils/udfsdk/avg_mode.cpp +++ b/utils/udfsdk/avg_mode.cpp @@ -25,9 +25,9 @@ using namespace mcsv1sdk; mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode avg_mode::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("avg_mode() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode avg_mode::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode avg_mode::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; MODE_DATA& data = static_cast(context->getUserData())->mData; @@ -187,8 +186,7 @@ mcsv1_UDAF::ReturnCode avg_mode::evaluate(mcsv1Context* context, static_any::any return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode avg_mode::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; MODE_DATA& data = static_cast(context->getUserData())->mData; diff --git a/utils/udfsdk/avg_mode.h b/utils/udfsdk/avg_mode.h index 4f3442005..5722c5fea 100644 --- a/utils/udfsdk/avg_mode.h +++ b/utils/udfsdk/avg_mode.h @@ -18,7 +18,7 @@ /*********************************************************************** * $Id$ * -* mcsv1_UDAF.h +* avg_mode.h ***********************************************************************/ /** @@ -50,8 +50,8 @@ * is also used to describe the interface that is used for * either. */ -#ifndef HEADER_mode -#define HEADER_mode +#ifndef HEADER_avg_mode +#define HEADER_avg_mode #include #include @@ -134,7 +134,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -169,8 +169,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -246,8 +245,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() diff --git a/utils/udfsdk/mcsv1_udaf.cpp b/utils/udfsdk/mcsv1_udaf.cpp index 349a642ec..ee08dcc07 100644 --- a/utils/udfsdk/mcsv1_udaf.cpp +++ b/utils/udfsdk/mcsv1_udaf.cpp @@ -36,6 +36,8 @@ UDAF_MAP UDAFMap::fm; #include "ssq.h" #include "median.h" #include "avg_mode.h" +#include "regr_avgx.h" +#include "avgx.h" UDAF_MAP& UDAFMap::getMap() { if (fm.size() > 0) @@ -52,6 +54,8 @@ UDAF_MAP& UDAFMap::getMap() fm["ssq"] = new ssq(); fm["median"] = new median(); fm["avg_mode"] = new avg_mode(); + fm["regr_avgx"] = new regr_avgx(); + fm["avgx"] = new avgx(); return fm; } @@ -115,8 +119,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const { // We don't test the per row data fields. They don't determine // if it's the same Context. - if (getName() != c.getName() - || fRunFlags != c.fRunFlags + if (getName() != c.getName() + ||fRunFlags != c.fRunFlags || fContextFlags != c.fContextFlags || fUserDataSize != c.fUserDataSize || fResultType != c.fResultType @@ -125,7 +129,8 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const || fStartFrame != c.fStartFrame || fEndFrame != c.fEndFrame || fStartConstant != c.fStartConstant - || fEndConstant != c.fEndConstant) + || fEndConstant != c.fEndConstant + || fParamCount != c.fParamCount) return false; return true; @@ -217,6 +222,7 @@ void mcsv1Context::serialize(messageqcpp::ByteStream& b) const b << (uint32_t)fEndFrame; b << fStartConstant; b << fEndConstant; + b << fParamCount; } void mcsv1Context::unserialize(messageqcpp::ByteStream& b) @@ -238,6 +244,7 @@ void mcsv1Context::unserialize(messageqcpp::ByteStream& b) fEndFrame = (WF_FRAME)frame; b >> fStartConstant; b >> fEndConstant; + b >> fParamCount; } void UserData::serialize(messageqcpp::ByteStream& bs) const diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index d24852c28..df3f47649 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -77,6 +77,7 @@ #include "any.hpp" #include "calpontsystemcatalog.h" #include "wf_frame.h" +#include "my_decimal_limits.h" using namespace execplan; @@ -200,12 +201,8 @@ static uint64_t CONTEXT_IS_PM __attribute__ ((unused)) = 1 << 2; // Flags that describe the contents of a specific input parameter // These will be set in context->dataFlags for each method call by the framework. // User code shouldn't use these directly -static uint64_t PARAM_IS_NULL __attribute__ ((unused)) = 1; -static uint64_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; - -// shorthand for the list of columns in the call sent to init() -// first is the actual column name and second is the data type in Columnstore. -typedef std::vector >COL_TYPES; +static uint32_t PARAM_IS_NULL __attribute__ ((unused)) = 1; +static uint32_t PARAM_IS_CONSTANT __attribute__ ((unused)) = 1 << 1; // This is the context class that is passed to all API callbacks // The framework potentially sets data here for each invocation of @@ -269,7 +266,9 @@ public: EXPORT bool isPM(); // Parameter refinement description accessors - // valid in nextValue and dropValue + + // How many actual parameters were entered. + // valid in all calls size_t getParameterCount() const; // Determine if an input parameter is NULL @@ -298,6 +297,7 @@ public: // This only makes sense if the return type is decimal, but should be set // to (0, -1) for other types if the inout is decimal. // valid in init() + // Set the scale to DECIMAL_NOT_SPECIFIED if you want a floating decimal. EXPORT bool setScale(int32_t scale); EXPORT bool setPrecision(int32_t precision); @@ -372,7 +372,7 @@ private: int32_t fResultscale; // For scale, the number of digits to the right of the decimal int32_t fResultPrecision; // The max number of digits allowed in the decimal value std::string errorMsg; - std::vector* dataFlags; // one entry for each parameter + uint32_t* dataFlags; // an integer array wirh one entry for each parameter bool* bInterrupted; // Gets set to true by the Framework if something happens WF_FRAME fStartFrame; // Is set to default to start, then modified by the actual frame in the call WF_FRAME fEndFrame; // Is set to default to start, then modified by the actual frame in the call @@ -380,6 +380,7 @@ private: int32_t fEndConstant; // for end frame WF_PRECEEDIMG or WF_FOLLOWING std::string functionName; mcsv1sdk::mcsv1_UDAF* func; + int32_t fParamCount; public: // For use by the framework @@ -394,13 +395,14 @@ public: EXPORT void clearContextFlag(uint64_t flag); EXPORT uint64_t getContextFlags() const; EXPORT uint32_t getUserDataSize() const; - EXPORT std::vector& getDataFlags(); - EXPORT void setDataFlags(std::vector* flags); + EXPORT uint32_t* getDataFlags(); + EXPORT void setDataFlags(uint32_t* flags); EXPORT void setInterrupted(bool interrupted); EXPORT void setInterrupted(bool* interrupted); EXPORT mcsv1sdk::mcsv1_UDAF* getFunction(); EXPORT mcsv1sdk::mcsv1_UDAF* getFunction() const; EXPORT boost::shared_ptr getUserDataSP(); + EXPORT void setParamCount(int32_t paramCount); }; // Since aggregate functions can operate on any data type, we use the following structure @@ -419,9 +421,10 @@ public: struct ColumnDatum { CalpontSystemCatalog::ColDataType dataType; // defined in calpontsystemcatalog.h - static_any::any columnData; + static_any::any columnData; // Not valid in init() uint32_t scale; // If dataType is a DECIMAL type uint32_t precision; // If dataType is a DECIMAL type + std::string alias; // Only filled in for init() ColumnDatum() : dataType(CalpontSystemCatalog::UNDEFINED), scale(0), precision(-1) {}; }; @@ -466,7 +469,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes) = 0; + ColumnDatum* colTypes) = 0; /** * reset() @@ -501,8 +504,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn) = 0; + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn) = 0; /** * subEvaluate() @@ -579,8 +581,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() @@ -640,32 +641,32 @@ inline mcsv1Context::mcsv1Context() : fEndFrame(WF_CURRENT_ROW), fStartConstant(0), fEndConstant(0), - func(NULL) + func(NULL), + fParamCount(0) { } inline mcsv1Context::mcsv1Context(const mcsv1Context& rhs) : - fContextFlags(0), - fColWidth(0), - dataFlags(NULL), - bInterrupted(NULL), - func(NULL) + dataFlags(NULL) { copy(rhs); } inline mcsv1Context& mcsv1Context::copy(const mcsv1Context& rhs) { - fRunFlags = rhs.getRunFlags(); - fResultType = rhs.getResultType(); - fUserDataSize = rhs.getUserDataSize(); - fResultscale = rhs.getScale(); - fResultPrecision = rhs.getPrecision(); + fRunFlags = rhs.fRunFlags; + fContextFlags = rhs.fContextFlags; + fResultType = rhs.fResultType; + fUserDataSize = rhs.fUserDataSize; + fColWidth = rhs.fColWidth; + fResultscale = rhs.fResultscale; + fResultPrecision = rhs.fResultPrecision; rhs.getStartFrame(fStartFrame, fStartConstant); rhs.getEndFrame(fEndFrame, fEndConstant); - functionName = rhs.getName(); - bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference - func = rhs.func; + functionName = rhs.functionName; + bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference + func = rhs.func; + fParamCount = rhs.fParamCount; return *this; } @@ -675,11 +676,7 @@ inline mcsv1Context::~mcsv1Context() inline mcsv1Context& mcsv1Context::operator=(const mcsv1Context& rhs) { - fContextFlags = 0; - fColWidth = 0; dataFlags = NULL; - bInterrupted = NULL; - func = NULL; return copy(rhs); } @@ -753,16 +750,13 @@ inline bool mcsv1Context::isPM() inline size_t mcsv1Context::getParameterCount() const { - if (dataFlags) - return dataFlags->size(); - - return 0; + return fParamCount; } inline bool mcsv1Context::isParamNull(int paramIdx) { if (dataFlags) - return (*dataFlags)[paramIdx] & PARAM_IS_NULL; + return dataFlags[paramIdx] & PARAM_IS_NULL; return false; } @@ -770,7 +764,7 @@ inline bool mcsv1Context::isParamNull(int paramIdx) inline bool mcsv1Context::isParamConstant(int paramIdx) { if (dataFlags) - return (*dataFlags)[paramIdx] & PARAM_IS_CONSTANT; + return dataFlags[paramIdx] & PARAM_IS_CONSTANT; return false; } @@ -939,18 +933,22 @@ inline uint32_t mcsv1Context::getUserDataSize() const return fUserDataSize; } -inline std::vector& mcsv1Context::getDataFlags() +inline uint32_t* mcsv1Context::getDataFlags() { - return *dataFlags; + return dataFlags; } -inline void mcsv1Context::setDataFlags(std::vector* flags) +inline void mcsv1Context::setDataFlags(uint32_t* flags) { dataFlags = flags; } -inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, - std::vector& valsDropped) +inline void mcsv1Context::setParamCount(int32_t paramCount) +{ + fParamCount = paramCount; +} + +inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { return NOT_IMPLEMENTED; } diff --git a/utils/udfsdk/median.cpp b/utils/udfsdk/median.cpp index e32d721f1..9c7e72dc3 100644 --- a/utils/udfsdk/median.cpp +++ b/utils/udfsdk/median.cpp @@ -25,9 +25,9 @@ using namespace mcsv1sdk; mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -35,13 +35,13 @@ mcsv1_UDAF::ReturnCode median::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("median() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -65,8 +65,7 @@ mcsv1_UDAF::ReturnCode median::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode median::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; MEDIAN_DATA& data = static_cast(context->getUserData())->mData; @@ -212,8 +211,7 @@ mcsv1_UDAF::ReturnCode median::evaluate(mcsv1Context* context, static_any::any& return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode median::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; MEDIAN_DATA& data = static_cast(context->getUserData())->mData; diff --git a/utils/udfsdk/median.h b/utils/udfsdk/median.h index d64792461..142be6ba8 100644 --- a/utils/udfsdk/median.h +++ b/utils/udfsdk/median.h @@ -134,7 +134,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -169,8 +169,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -246,8 +245,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); /** * createUserData() diff --git a/utils/udfsdk/ssq.cpp b/utils/udfsdk/ssq.cpp index 4d9ef7e10..20fdc33db 100644 --- a/utils/udfsdk/ssq.cpp +++ b/utils/udfsdk/ssq.cpp @@ -34,9 +34,9 @@ struct ssq_data #define OUT_TYPE int64_t mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, - COL_TYPES& colTypes) + ColumnDatum* colTypes) { - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -44,13 +44,13 @@ mcsv1_UDAF::ReturnCode ssq::init(mcsv1Context* context, return mcsv1_UDAF::ERROR; } - if (colTypes.size() > 1) + if (context->getParameterCount() > 1) { context->setErrorMessage("ssq() with more than 1 argument"); return mcsv1_UDAF::ERROR; } - if (!(isNumeric(colTypes[0].second))) + if (!(isNumeric(colTypes[0].dataType))) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -81,8 +81,7 @@ mcsv1_UDAF::ReturnCode ssq::reset(mcsv1Context* context) return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, - std::vector& valsIn) +mcsv1_UDAF::ReturnCode ssq::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn = valsIn[0].columnData; struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; @@ -183,8 +182,7 @@ mcsv1_UDAF::ReturnCode ssq::evaluate(mcsv1Context* context, static_any::any& val return mcsv1_UDAF::SUCCESS; } -mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, - std::vector& valsDropped) +mcsv1_UDAF::ReturnCode ssq::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn = valsDropped[0].columnData; struct ssq_data* data = (struct ssq_data*)context->getUserData()->data; diff --git a/utils/udfsdk/ssq.h b/utils/udfsdk/ssq.h index 514c7a3f0..2cac61c2c 100644 --- a/utils/udfsdk/ssq.h +++ b/utils/udfsdk/ssq.h @@ -114,7 +114,7 @@ public: * mcsv1_UDAF::SUCCESS. */ virtual ReturnCode init(mcsv1Context* context, - COL_TYPES& colTypes); + ColumnDatum* colTypes); /** * reset() @@ -147,8 +147,7 @@ public: * * valsIn (in) - a vector of the parameters from the row. */ - virtual ReturnCode nextValue(mcsv1Context* context, - std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); /** * subEvaluate() @@ -224,8 +223,7 @@ public: * dropValue() will not be called for unbounded/current row type * frames, as those are already optimized. */ - virtual ReturnCode dropValue(mcsv1Context* context, - std::vector& valsDropped); + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); protected: }; diff --git a/utils/udfsdk/udfmysql.cpp b/utils/udfsdk/udfmysql.cpp index 981651c43..dc0277ccc 100644 --- a/utils/udfsdk/udfmysql.cpp +++ b/utils/udfsdk/udfmysql.cpp @@ -490,6 +490,168 @@ extern "C" // return data->sumsq; return 0; } + +//======================================================================= + + /** + * regr_avgx connector stub + */ + struct regr_avgx_data + { + double sumx; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_avgx_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_avgx() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // TODO test for NULL in x and y + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + ++data->cnt; + data->sumx += xval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } + +//======================================================================= + + /** + * avgx connector stub. Exactly the same functionality as the + * built in avg() function. Use to test the performance of the + * API + */ + struct avgx_data + { + double sumx; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct avgx_data* data; + if (args->arg_count != 1) + { + strcpy(message,"avgx() requires one argument"); + return 1; + } + + if (!(data = (struct avgx_data*) malloc(sizeof(struct avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct avgx_data* data = (struct avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // TODO test for NULL in x and y + struct avgx_data* data = (struct avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + ++data->cnt; + data->sumx += xval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct avgx_data* data = (struct avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } } // vim:ts=4 sw=4: diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index 664b0e7de..fe1f3fd0e 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -204,8 +204,10 @@ Filters="*.c;*.C;*.cc;*.cpp;*.cp;*.cxx;*.c++;*.prg;*.pas;*.dpr;*.asm;*.s;*.bas;*.java;*.cs;*.sc;*.e;*.cob;*.html;*.rc;*.tcl;*.py;*.pl;*.d"> + + @@ -215,8 +217,10 @@ Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if"> + + diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index f302c49cd..5cd5243c5 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -52,6 +52,7 @@ using namespace joblist; namespace windowfunction { + template boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context) { @@ -142,7 +143,7 @@ template void WF_udaf::resetData() { getContext().getFunction()->reset(&getContext()); - fSet.clear(); + fDistinctSet.clear(); WindowFunctionType::resetData(); } @@ -150,8 +151,8 @@ template void WF_udaf::parseParms(const std::vector& parms) { bRespectNulls = true; - // parms[1]: respect null | ignore null - ConstantColumn* cc = dynamic_cast(parms[1].get()); + // The last parms: respect null | ignore null + ConstantColumn* cc = dynamic_cast(parms[parms.size()-1].get()); idbassert(cc != NULL); bool isNull = false; // dummy, harded coded bRespectNulls = (cc->getIntVal(fRow, isNull) > 0); @@ -167,52 +168,71 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) } mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - uint64_t colOut = fFieldIndex[0]; - uint64_t colIn = fFieldIndex[1]; - mcsv1sdk::ColumnDatum datum; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colOut); + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC); + + // Put the parameter metadata (type, scale, precision) into valsIn + mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) + { + uint64_t colIn = fFieldIndex[i+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } for (int64_t i = b; i < e; i++) { if (i % 1000 == 0 && fStep->cancelled()) break; + bool bHasNull = false; fRow.setData(getPointer(fRowData->at(i))); // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; + uint32_t flags[getContext().getParameterCount()]; - if (fRow.isNullValue(colIn) == true) + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - if (!bRespectNulls) + uint64_t colIn = fFieldIndex[k+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[k]; + flags[k] = 0; + if (fRow.isNullValue(colIn) == true) { - continue; + if (!bRespectNulls) + { + bHasNull = true; + break; + } + + flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - flag |= mcsv1sdk::PARAM_IS_NULL; + T valIn; + getValue(colIn, valIn, &datum.dataType); + + // Check for distinct, if turned on. + // Currently, distinct only works for param 1 + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + + datum.columnData = valIn; } - - flags.push_back(flag); - getContext().setDataFlags(&flags); - - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - // TODO: when we impliment distinct, we need to revist this. - if ((fDistinct) || (fSet.find(valIn) != fSet.end())) + if (bHasNull) { continue; } - datum.columnData = valIn; - - std::vector valsIn; - valsIn.push_back(datum); - rc = getContext().getFunction()->dropValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::NOT_IMPLEMENTED) @@ -442,59 +462,191 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) else if (fPrev <= e && fPrev > c) e = c; - uint64_t colIn = fFieldIndex[1]; + // Turn on the Analytic flag so the function is aware it is being called + // as a Window Function. + getContext().setContextFlag(mcsv1sdk::CONTEXT_IS_ANALYTIC); - mcsv1sdk::ColumnDatum datum; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colOut); + // Put the parameter metadata (type, scale, precision) into valsIn + mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) + { + uint64_t colIn = fFieldIndex[i+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[i]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } if (b <= c && c <= e) getContext().setContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); else getContext().clearContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); - + bool bHasNull = false; for (int64_t i = b; i <= e; i++) { if (i % 1000 == 0 && fStep->cancelled()) break; fRow.setData(getPointer(fRowData->at(i))); - // Turn on NULL flags - std::vector flags; - uint32_t flag = 0; - if (fRow.isNullValue(colIn) == true) + // NULL flags + uint32_t flags[getContext().getParameterCount()]; + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - if (!bRespectNulls) + uint64_t colIn = fFieldIndex[k+1]; + mcsv1sdk::ColumnDatum& datum = valsIn[k]; + + // Turn on Null flags or skip based on respect nulls + flags[k] = 0; + if (fRow.isNullValue(colIn) == true) + { + if (!bRespectNulls) + { + bHasNull = true; + break; + } + + flags[k] |= mcsv1sdk::PARAM_IS_NULL; + } + + // MCOL-1201 Multi-Paramter calls + switch (datum.dataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + { + int64_t valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + case CalpontSystemCatalog::UDECIMAL: + { + uint64_t valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + { + double valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + { + float valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::BLOB: + { + string valIn; + getValue(colIn, valIn); + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } + + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + default: + { + string errStr = "(" + colType2String[i] + ")"; + errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + + break; + } + } + // Skip if any value is NULL and respect nulls is off. + if (bHasNull) { continue; } - - flag |= mcsv1sdk::PARAM_IS_NULL; } - - flags.push_back(flag); - getContext().setDataFlags(&flags); - - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - if ((fDistinct) || (fSet.find(valIn) != fSet.end())) - { - continue; - } - - if (fDistinct) - fSet.insert(valIn); - - datum.columnData = valIn; - - std::vector valsIn; - valsIn.push_back(datum); - + getContext().setDataFlags(flags); + rc = getContext().getFunction()->nextValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) diff --git a/utils/windowfunction/wf_udaf.h b/utils/windowfunction/wf_udaf.h index babb32565..f7a4c4b08 100644 --- a/utils/windowfunction/wf_udaf.h +++ b/utils/windowfunction/wf_udaf.h @@ -21,13 +21,35 @@ #ifndef UTILS_WF_UDAF_H #define UTILS_WF_UDAF_H -#include +#ifndef _MSC_VER +#include +#else +#include +#endif #include "windowfunctiontype.h" #include "mcsv1_udaf.h" namespace windowfunction { +// Hash classes for the distinct hashmap +class DistinctHasher +{ +public: + inline size_t operator()(const static_any::any& a) const + { + return a.getHash(); + } +}; + +class DistinctEqual +{ +public: + inline bool operator()(const static_any::any& lhs, static_any::any& rhs) const + { + return lhs == rhs; + } +}; // A class to control the execution of User Define Analytic Functions (UDAnF) // as defined by a specialization of mcsv1sdk::mcsv1_UDAF @@ -72,7 +94,8 @@ protected: bool fDistinct; bool bRespectNulls; // respect null | ignore null bool bHasDropValue; // Set to false when we discover the UDAnF doesn't implement dropValue. - std::set fSet; // To hold distinct values + // To hold distinct values + std::tr1::unordered_set fDistinctSet; static_any::any fValOut; // The return value public: diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp index 950045899..4c5b4de32 100644 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -492,10 +492,10 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos) static uint64_t dateNull = joblist::DATENULL; static uint64_t datetimeNull = joblist::DATETIMENULL; static uint64_t timeNull = joblist::TIMENULL; - static uint64_t char1Null = joblist::CHAR1NULL; - static uint64_t char2Null = joblist::CHAR2NULL; - static uint64_t char4Null = joblist::CHAR4NULL; - static uint64_t char8Null = joblist::CHAR8NULL; +// static uint64_t char1Null = joblist::CHAR1NULL; +// static uint64_t char2Null = joblist::CHAR2NULL; +// static uint64_t char4Null = joblist::CHAR4NULL; +// static uint64_t char8Null = joblist::CHAR8NULL; static string stringNull(""); void* v = NULL; diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index 7cd275021..71d0e1fbd 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -1280,7 +1280,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -2025,10 +2025,10 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 0; k < colStructList.size(); k++) + for (size_t k = 0; k < colStructList.size(); k++) { // Skip the selected column - if (k == colId) + if (k == (size_t)colId) continue; Column expandCol; @@ -2583,7 +2583,7 @@ int WriteEngineWrapper::insertColumnRec_SYS(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; @@ -3278,7 +3278,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k = 1; k < colStructList.size(); k++) + for (size_t k = 1; k < colStructList.size(); k++) { Column expandCol; colOp = m_colOp[op(colStructList[k].fCompressionType)]; From c67ac7699e4c9080e79b18fee6a25d8956e628e2 Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 11 May 2018 09:53:49 -0500 Subject: [PATCH 033/123] MCOL-1201 Add test UDAF back in after rebase --- utils/udfsdk/avgx.cpp | 257 +++++++++++++++++++++++++++++++++++ utils/udfsdk/avgx.h | 99 ++++++++++++++ utils/udfsdk/regr_avgx.cpp | 270 +++++++++++++++++++++++++++++++++++++ utils/udfsdk/regr_avgx.h | 99 ++++++++++++++ 4 files changed, 725 insertions(+) create mode 100644 utils/udfsdk/avgx.cpp create mode 100644 utils/udfsdk/avgx.h create mode 100644 utils/udfsdk/regr_avgx.cpp create mode 100644 utils/udfsdk/regr_avgx.h diff --git a/utils/udfsdk/avgx.cpp b/utils/udfsdk/avgx.cpp new file mode 100644 index 000000000..887a8418e --- /dev/null +++ b/utils/udfsdk/avgx.cpp @@ -0,0 +1,257 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "avgx.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +#define DATATYPE double + +// Use the simple data model +struct avgx_data +{ + double sum; + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode avgx::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 1) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("avgx() with other than 1 arguments"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[0].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("avgx() with a non-numeric x argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(avgx_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 4); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode avgx::reset(mcsv1Context* context) +{ + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + data->sum = 0; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_x = valsIn[0].columnData; + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[0].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum += val; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct avgx_data* outData = (struct avgx_data*)context->getUserData()->data; + struct avgx_data* inData = (struct avgx_data*)userDataIn->data; + + outData->sum += inData->sum; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + + valOut = data->sum / (double)data->cnt; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_x = valsDropped[0].columnData; + struct avgx_data* data = (struct avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[0].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum -= val; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/avgx.h b/utils/udfsdk/avgx.h new file mode 100644 index 000000000..0569b6091 --- /dev/null +++ b/utils/udfsdk/avgx.h @@ -0,0 +1,99 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* avgx.h +***********************************************************************/ + +/** + * Columnstore interface for for the avgx function + * + * + * CREATE AGGREGATE FUNCTION avgx returns REAL soname + * 'libudf_mysql.so'; + * + */ +#ifndef HEADER_avgx +#define HEADER_avgx + +#include +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. + +// Return the avgx value of the dataset + +class avgx : public mcsv1_UDAF +{ +public: + // Defaults OK + avgx() : mcsv1_UDAF() {}; + virtual ~avgx() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_.h + diff --git a/utils/udfsdk/regr_avgx.cpp b/utils/udfsdk/regr_avgx.cpp new file mode 100644 index 000000000..c7cc5b56e --- /dev/null +++ b/utils/udfsdk/regr_avgx.cpp @@ -0,0 +1,270 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_avgx.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +#define DATATYPE double + +// Use the simple data model +struct regr_avgx_data +{ + double sum; + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode regr_avgx::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgx() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[1].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgx() with a non-numeric x argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_avgx_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[1].scale + 4); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_avgx::reset(mcsv1Context* context) +{ + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + data->sum = 0; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (context->isParamNull(0) || context->isParamNull(1)) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + if (valIn_x.empty() || valIn_y.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[1].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum += val; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_avgx_data* outData = (struct regr_avgx_data*)context->getUserData()->data; + struct regr_avgx_data* inData = (struct regr_avgx_data*)userDataIn->data; + + outData->sum += inData->sum; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + + if (data->cnt == 0) + { + valOut = 0; + } + else + { + valOut = data->sum / (double)data->cnt; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (valIn_x.empty() || valIn_y.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_x.compatible(charTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(scharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(shortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(intTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(longTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(llTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ucharTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ushortTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(uintTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ulongTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(ullTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(floatTypeId)) + { + val = valIn_x.cast(); + } + else if (valIn_x.compatible(doubleTypeId)) + { + val = valIn_x.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[1].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum -= val; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/regr_avgx.h b/utils/udfsdk/regr_avgx.h new file mode 100644 index 000000000..f70f30d8c --- /dev/null +++ b/utils/udfsdk/regr_avgx.h @@ -0,0 +1,99 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_avgx.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_avgx function + * + * + * CREATE AGGREGATE FUNCTION regr_avgx returns REAL soname + * 'libudf_mysql.so'; + * + */ +#ifndef HEADER_regr_avgx +#define HEADER_regr_avgx + +#include +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. + +// Return the regr_avgx value of the dataset + +class regr_avgx : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_avgx() : mcsv1_UDAF() {}; + virtual ~regr_avgx() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_avgx.h + From 06e9772310005c3f824652ae8fbc5dc87d6f84e6 Mon Sep 17 00:00:00 2001 From: David Hall Date: Mon, 14 May 2018 17:28:24 -0500 Subject: [PATCH 034/123] MCOL-1201 some fixes from testing --- dbcon/joblist/tupleaggregatestep.cpp | 229 ++++++++++++--------------- dbcon/mysql/ha_calpont_execplan.cpp | 1 - utils/common/common.vpj | 2 + utils/rowgroup/rowaggregation.cpp | 4 +- 4 files changed, 106 insertions(+), 130 deletions(-) diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index ff490da5b..8f7755ad9 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -852,7 +852,6 @@ SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo) if (ac->aggOp() == ROWAGG_UDAF) { UDAFColumn* udafc = dynamic_cast(ac); - if (udafc) { constAggDataVec.push_back( @@ -1097,8 +1096,9 @@ void TupleAggregateStep::prep1PhaseAggregate( vector functionVec; uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); + // For UDAF uint32_t projColsUDAFIndex = 0; - + UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // for count column of average function map avgFuncMap; @@ -1287,12 +1287,10 @@ void TupleAggregateStep::prep1PhaseAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -1300,12 +1298,10 @@ void TupleAggregateStep::prep1PhaseAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, i)); break; } - } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -1474,8 +1470,6 @@ void TupleAggregateStep::prep1PhaseAggregate( throw logic_error("(2)prep1PhaseAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } - pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); - // Return column oidsAgg.push_back(oidsProj[colProj]); keysAgg.push_back(key); @@ -1677,8 +1671,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( uint32_t bigIntWidth = sizeof(int64_t); // map key = column key, operation (enum), and UDAF pointer if UDAF. AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; set avgSet; + + // fOR udaf + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; uint32_t projColsUDAFIndex = 0; // for count column of average function @@ -1847,7 +1844,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; if (udafc) @@ -1857,12 +1854,10 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAgg)); break; } - } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -2142,6 +2137,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // locate the return column position in aggregated rowgroup for (uint64_t i = 0; i < returnedColVec.size(); i++) { + udafc = NULL; pUDAFFunc = NULL; uint32_t retKey = returnedColVec[i].first; RowAggFunctionType aggOp = functionIdMap(returnedColVec[i].second); @@ -2150,10 +2146,21 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != @@ -2473,26 +2480,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - - for (; it != jobInfo.projectionCols.end(); it++) - { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); - break; - } - } - - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(3)prep1PhaseDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); - } + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); } else { @@ -2904,7 +2892,10 @@ void TupleAggregateStep::prep2PhasesAggregate( vector > aggColVec; set avgSet; vector >& returnedColVec = jobInfo.returnedColVec; + // For UDAF uint32_t projColsUDAFIndex = 0; + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -2947,7 +2938,6 @@ void TupleAggregateStep::prep2PhasesAggregate( uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // associate the columns between projected RG and aggregate RG on PM // populated the aggregate columns @@ -3084,12 +3074,10 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3098,10 +3086,9 @@ void TupleAggregateStep::prep2PhasesAggregate( break; } } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -3350,10 +3337,6 @@ void TupleAggregateStep::prep2PhasesAggregate( // add back sum or count(column name) if omitted due to avg column // put count(column name) column to the end, if it is for avg only { - // Keep a count of the parms after the first for any aggregate. - // These will be skipped and the count needs to be subtracted - // from where the aux column will be. - int64_t multiParms = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap; @@ -3369,6 +3352,8 @@ void TupleAggregateStep::prep2PhasesAggregate( } // locate the return column position in aggregated rowgroup from PM + // outIdx is i without the multi-columns, + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { uint32_t retKey = returnedColVec[i].first; @@ -3379,19 +3364,30 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_MULTI_PARM) { // Skip on UM: Extra parms for an aggregate have no work on the UM - ++multiParms; continue; } + // Is this a UDAF? use the function as part of the key. - - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; - + pUDAFFunc = NULL; + udafc = NULL; if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); @@ -3492,7 +3488,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep2PhasesAggregate: " << emsg << " oid=" @@ -3514,7 +3510,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByUm[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByUm[j]->fOutputColumnIndex = i; + groupByUm[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByUm[j]->fOutputColumnIndex; } @@ -3525,7 +3521,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (jobInfo.distinctColVec[j] == retKey) { if (groupByUm[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByUm[j]->fOutputColumnIndex = i; + groupByUm[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByUm[j]->fOutputColumnIndex; } @@ -3534,7 +3530,7 @@ void TupleAggregateStep::prep2PhasesAggregate( // a duplicate group by column if (dupGroupbyIndex != -1) functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } else { @@ -3542,30 +3538,11 @@ void TupleAggregateStep::prep2PhasesAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - - for (; it != jobInfo.projectionCols.end(); it++) - { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, i-multiParms)); - break; - } - } - - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); - } + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, i-multiParms)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colPm, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -3600,6 +3577,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (returnedColVec[i].second == AggregateColumn::AVG) avgFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // now fix the AVG function, locate the count(column) position @@ -3617,7 +3595,7 @@ void TupleAggregateStep::prep2PhasesAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size() - multiParms; + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -3724,7 +3702,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector > aggColVec, aggNoDistColVec; set avgSet, avgDistSet; vector >& returnedColVec = jobInfo.returnedColVec; + // For UDAF uint32_t projColsUDAFIndex = 0; + UDAFColumn* udafc = NULL; + mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -3796,7 +3777,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( uint32_t bigIntWidth = sizeof(int64_t); map, uint64_t> avgFuncDistMap; AGG_MAP aggFuncMap; - mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // associate the columns between projected RG and aggregate RG on PM // populated the aggregate columns @@ -3940,12 +3920,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) { - UDAFColumn* udafc = dynamic_cast((*it).get()); + udafc = dynamic_cast((*it).get()); projColsUDAFIndex++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3954,10 +3932,9 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( break; } } - if (it == jobInfo.projectionCols.end()) { - throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); + throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } else @@ -4201,32 +4178,33 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // associate the columns between the aggregate RGs on PM and UM without distinct aggregator // populated the returned columns { + int64_t multiParms = 0; + for (uint32_t idx = 0; idx < groupByPm.size(); idx++) { SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(idx, idx)); groupByUm.push_back(groupby); } - // Keep a count of the parms after the first for any aggregate. - // These will be skipped and the count needs to be subtracted - // from where the aux column will be. - int64_t multiParms = 0; for (uint32_t idx = 0; idx < functionVecPm.size(); idx++) - { SP_ROWAGG_FUNC_t funct; SP_ROWAGG_FUNC_t funcPm = functionVecPm[idx]; - // UDAF support if (funcPm->fAggFunction == ROWAGG_MULTI_PARM) { - // Multi-Parm is not used on the UM + // Skip on UM: Extra parms for an aggregate have no work on the UM ++multiParms; continue; } + if (funcPm->fAggFunction == ROWAGG_UDAF) { RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); + if (!udafFuncCol) + { + throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + } funct.reset(new RowUDAFFunctionCol( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, @@ -4273,6 +4251,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // These will be skipped and the count needs to be subtracted // from where the aux column will be. int64_t multiParms = 0; + projColsUDAFIndex = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4286,9 +4265,12 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // locate the return column position in aggregated rowgroup from PM + // outIdx is i without the multi-columns, + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { pUDAFFunc = NULL; + udafc = NULL; uint32_t retKey = returnedColVec[i].first; RowAggFunctionType aggOp = functionIdMap(returnedColVec[i].second); @@ -4304,10 +4286,21 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - UDAFColumn* udafc = dynamic_cast(jobInfo.projectionCols[i].get()); - - if (udafc) - pUDAFFunc = udafc->getContext().getFunction(); + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIndex++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } } if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != @@ -4436,7 +4429,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it != aggFuncMap.end()) { - colUm = it->second - multiParms; + colUm = it->second; oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(keysAggUm[colUm]); scaleAggDist.push_back(scaleAggUm[colUm]); @@ -4460,7 +4453,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // false alarm returnColMissing = false; - colUm = it->second - multiParms; + colUm = it->second; if (aggOp == ROWAGG_SUM) { @@ -4528,7 +4521,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep2PhasesDistinctAggregate: " << emsg << " oid=" @@ -4552,7 +4545,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByNoDist[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByNoDist[j]->fOutputColumnIndex = i; + groupByNoDist[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByNoDist[j]->fOutputColumnIndex; } @@ -4561,7 +4554,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // a duplicate group by column if (dupGroupbyIndex != -1) functionVecUm.push_back(SP_ROWAGG_FUNC_t(new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } else { @@ -4569,30 +4562,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - - for (; it != jobInfo.projectionCols.end(); it++) - { - UDAFColumn* udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, i-multiParms)); - break; - } - } - - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no/not enough UDAFColumn/-s"); - } + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, i-multiParms)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colUm, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -4629,6 +4603,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // for (i // now fix the AVG function, locate the count(column) position @@ -4646,7 +4621,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size() - multiParms; + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -4706,7 +4681,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (!udafFuncCol) { - throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(5)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } functionVecUm[i]->fAuxColumnIndex = lastCol++; diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 9150d5393..c1706eee7 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4573,7 +4573,6 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) udafc->resultType(ct); } } - } catch (std::logic_error e) { diff --git a/utils/common/common.vpj b/utils/common/common.vpj index 69059884c..ea67e04ba 100755 --- a/utils/common/common.vpj +++ b/utils/common/common.vpj @@ -200,6 +200,7 @@ + @@ -208,6 +209,7 @@ Name="Header Files" Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if"> + diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index c1f5bbd63..043dcaac2 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -2015,13 +2015,13 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) { - int32_t paramCount = fRGContext.getParameterCount(); + uint32_t paramCount = fRGContext.getParameterCount(); // The vector of parameters to be sent to the UDAF mcsv1sdk::ColumnDatum valsIn[paramCount]; uint32_t dataFlags[paramCount]; execplan::CalpontSystemCatalog::ColDataType colDataType; - for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) + for (uint32_t i = 0; i < paramCount; ++i) { mcsv1sdk::ColumnDatum& datum = valsIn[i]; // Turn on NULL flags From c8c3b23e32b676d09604c39798167102b03df4c3 Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 15 May 2018 13:15:45 -0500 Subject: [PATCH 035/123] MCOL-1201 Modify docs. Fix group concat bug --- dbcon/mysql/ha_calpont_execplan.cpp | 1 + utils/udfsdk/docs/source/changelog.rst | 1 + .../docs/source/reference/ColumnDatum.rst | 6 ++-- .../docs/source/reference/MariaDBUDAF.rst | 2 +- .../udfsdk/docs/source/reference/UDAFMap.rst | 2 +- .../docs/source/reference/mcsv1Context.rst | 2 +- .../docs/source/reference/mcsv1_UDAF.rst | 36 ++++++++----------- utils/udfsdk/docs/source/usage/cmakelists.rst | 2 +- utils/udfsdk/docs/source/usage/compile.rst | 2 +- utils/udfsdk/docs/source/usage/headerfile.rst | 6 ++-- .../udfsdk/docs/source/usage/introduction.rst | 4 +-- utils/udfsdk/docs/source/usage/sourcefile.rst | 29 +++++++-------- utils/udfsdk/udfsdk.vpj | 33 +++++++++++++++++ 13 files changed, 75 insertions(+), 51 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index c1706eee7..4a86dc218 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4165,6 +4165,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) rowCol->columnVec(selCols); (dynamic_cast(ac))->orderCols(orderCols); parm.reset(rowCol); + ac->aggParms().push_back(parm); if (gc->str_separator()) { diff --git a/utils/udfsdk/docs/source/changelog.rst b/utils/udfsdk/docs/source/changelog.rst index fcd93d54c..1a7c749f9 100644 --- a/utils/udfsdk/docs/source/changelog.rst +++ b/utils/udfsdk/docs/source/changelog.rst @@ -5,4 +5,5 @@ Version History | Version | Date | Changes | +=========+============+=============================+ | 1.1.0α | 2017-08-25 | - First alpha release | +| 1.2.0α | 2016-05-18 | - Add multi parm support | +---------+------------+-----------------------------+ diff --git a/utils/udfsdk/docs/source/reference/ColumnDatum.rst b/utils/udfsdk/docs/source/reference/ColumnDatum.rst index dd1006363..5304a2953 100644 --- a/utils/udfsdk/docs/source/reference/ColumnDatum.rst +++ b/utils/udfsdk/docs/source/reference/ColumnDatum.rst @@ -1,3 +1,5 @@ +.. _ColumnDatum: + ColumnDatum =========== @@ -13,7 +15,7 @@ Example for int data: int myint = valIn.cast(); -For multi-paramter aggregations (not available in Columnstore 1.1), the colsIn vector of next_value() contains the ordered set of row parameters. +For multi-paramter aggregations (not available in Columnstore 1.1), the colsIn array of next_value() contains the ordered set of row parameters. For char, varchar, text, varbinary and blob types, columnData will be std::string. @@ -59,7 +61,7 @@ The provided values are: * - SMALLINT - A signed two byte integer * - DECIMAL - - A Columnstore Decimal value. For Columnstore 1.1, this is stored in the smallest integer type field that will hold the required precision. + - A Columnstore Decimal value. This is stored in the smallest integer type field that will hold the required precision. * - MEDINT - A signed four byte integer * - INT diff --git a/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst b/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst index 1f6fa7acb..d031705d8 100644 --- a/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst +++ b/utils/udfsdk/docs/source/reference/MariaDBUDAF.rst @@ -13,7 +13,7 @@ The library placed in mysql/lib is the name you use in the SQL CREATE AGGREGATE CREATE AGGREGATE FUNCTION ssq returns REAL soname 'libudf_mysql.so'; -Unlike the code you write for the Columnstore UDAF, MariaDB does not handle allocation and de-allocation of your memory structures. If writing your function for other engines, you must handle allocation and de-alloaction in :ref:`function_init ` and :ref:`function_deinit ` +Unlike the code you write for the Columnstore UDAF, MariaDB does not handle allocation and de-allocation of your memory structures in other engines. If writing your function for other engines, you must handle allocation and de-alloaction in :ref:`function_init ` and :ref:`function_deinit ` All of the MariaDB UDF and UDAF example functions are in a single source file named udfmysql.cpp and linked into libudf_mysql.so. diff --git a/utils/udfsdk/docs/source/reference/UDAFMap.rst b/utils/udfsdk/docs/source/reference/UDAFMap.rst index 48706bab3..d3cda63f4 100644 --- a/utils/udfsdk/docs/source/reference/UDAFMap.rst +++ b/utils/udfsdk/docs/source/reference/UDAFMap.rst @@ -3,7 +3,7 @@ UDAFMap ======= -The UDAFMap is where we tell the system about our function. For Columnstore 1.1, you must manually place your function into this map. +The UDAFMap is where we tell the system about our function. For Columnstore 1.2, you must manually place your function into this map. * open mcsv1_udaf.cpp * add your header to the #include list diff --git a/utils/udfsdk/docs/source/reference/mcsv1Context.rst b/utils/udfsdk/docs/source/reference/mcsv1Context.rst index 279220fb3..02adf57ab 100644 --- a/utils/udfsdk/docs/source/reference/mcsv1Context.rst +++ b/utils/udfsdk/docs/source/reference/mcsv1Context.rst @@ -150,7 +150,7 @@ Use these to determine the way your UDA(n)F was called .. c:function:: size_t getParameterCount() const; -:returns: the number of parameters to the function in the SQL query. Columnstore 1.1 only supports one parameter. +:returns: the number of parameters to the function in the SQL query. .. c:function:: bool isParamNull(int paramIdx); diff --git a/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst b/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst index 73c8f6570..f75fe73fc 100644 --- a/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst +++ b/utils/udfsdk/docs/source/reference/mcsv1_UDAF.rst @@ -1,4 +1,4 @@ -.. _ mcsv1_udaf: +.. _mcsv1_udaf: mcsv1_UDAF ========== @@ -11,12 +11,14 @@ The base class has no data members. It is designed to be only a container for yo However, adding static const members makes sense. -For UDAF (not Wndow Functions) Aggregation takes place in three stages: +For UDAF (not Window Functions) Aggregation takes place in three stages: * Subaggregation on the PM. nextValue() * Consolodation on the UM. subevaluate() * Evaluation of the function on the UM. evaluate() +There are situations where the system makes a choice to perform all UDAF calculations on the UM. The presence of group_concat() in the query and certain joins can cause the optimizer to make this choice. + For Window Functions, all aggregation occurs on the UM, and thus the subevaluate step is skipped. There is an optional dropValue() function that may be added. * Aggregation on the UM. nextValue() @@ -80,17 +82,11 @@ Callback Methods .. _init: -.. c:function:: ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); +.. c:function:: ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); :param context: The context object for this call. -:param colTypes: A list of the column types of the parameters. - - COL_TYPES is defined as:: - - typedef std::vector >COL_TYPES; - - In Columnstore 1.1, only one column is supported, so colTyoes will be of length one. +:param colTypes: A list of ColumnDatum structures. Use this to access the column types of the parameters. colTypes.columnData will be invalid. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS @@ -116,25 +112,23 @@ Callback Methods .. _nextvalue: -.. c:function:: ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); +.. c:function:: ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); :param context: The context object for this call -:param valsIn: a vector representing the values to be added for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. - +:param valsIn: an array representing the values to be added for each parameter for this row. + :returns: ReturnCode::ERROR or ReturnCode::SUCCESS Use context->getUserData() and type cast it to your UserData type or Simple Data Model stuct. nextValue() is called for each Window movement that passes the WHERE and HAVING clauses. The context's UserData will contain values that have been sub-aggregated to this point for the group, partition or Window Frame. nextValue is called on the PM for aggregation and on the UM for Window Functions. - When used in an aggregate, the function may not rely on order or completeness since the sub-aggregation is going on at the PM, it only has access to the data stored on the PM's dbroots. + When used in an aggregate, the function should not rely on order or completeness since the sub-aggregation is going on at the PM, it only has access to the data stored on the PM's dbroots. - When used as a analytic function (Window Function), nextValue is call for each Window movement in the Window. If dropValue is defined, then it may be called for every value leaving the Window, and nextValue called for each new value entering the Window. + When used as a analytic function (Window Function), nextValue is called for each Window movement in the Window. If dropValue is defined, then it may be called for every value leaving the Window, and nextValue called for each new value entering the Window. - Since this is called for every row, it is important that this method be efficient. + Since this may called for every row, it is important that this method be efficient. .. _subevaluate: @@ -172,13 +166,11 @@ Callback Methods .. _dropvalue: -.. c:function:: ReturnCode dropValue(mcsv1Context* context, std::vector& valsDropped); +.. c:function:: ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); :param context: The context object for this call -:param valsDropped: a vector representing the values to be dropped for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. +:param valsDropped: an array representing the values to be dropped for each parameter for this row. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS diff --git a/utils/udfsdk/docs/source/usage/cmakelists.rst b/utils/udfsdk/docs/source/usage/cmakelists.rst index 32a218459..a7ddacbaf 100644 --- a/utils/udfsdk/docs/source/usage/cmakelists.rst +++ b/utils/udfsdk/docs/source/usage/cmakelists.rst @@ -3,7 +3,7 @@ CMakeLists.txt ============== -For Columnstore 1.1, you compile your function by including it in the CMakeLists.txt file for the udfsdk. +For Columnstore 1.2, you compile your function by including it in the CMakeLists.txt file for the udfsdk. You need only add the new .cpp files to the udfsdk_LIB_SRCS target list:: diff --git a/utils/udfsdk/docs/source/usage/compile.rst b/utils/udfsdk/docs/source/usage/compile.rst index e6319e45b..b96af5d80 100644 --- a/utils/udfsdk/docs/source/usage/compile.rst +++ b/utils/udfsdk/docs/source/usage/compile.rst @@ -3,7 +3,7 @@ Compile ======= -To compile your function for Columnstore 1.1, simple recompile the udfsdk directory:: +To compile your function for Columnstore 1.2, simply recompile the udfsdk directory:: cd utils/usdsdk cmake . diff --git a/utils/udfsdk/docs/source/usage/headerfile.rst b/utils/udfsdk/docs/source/usage/headerfile.rst index 720acc5be..afb043e98 100644 --- a/utils/udfsdk/docs/source/usage/headerfile.rst +++ b/utils/udfsdk/docs/source/usage/headerfile.rst @@ -5,7 +5,7 @@ Header file Usually, each UDA(n)F function will have one .h and one .cpp file plus code for the mariadb UDAF plugin which may or may not be in a separate file. It is acceptable to put a set of related functions in the same files or use separate files for each. -The easiest way to create these files is to copy them an example closest to the type of function you intend to create. +The easiest way to create these files is to copy them from an example closest to the type of function you intend to create. Your header file must have a class defined that will implement your function. This class must be derived from mcsv1_UDAF and be in the mcsv1sdk namespace. The following examples use the "allnull" UDAF. @@ -29,9 +29,9 @@ allnull uses the Simple Data Model. See :ref:`complexdatamodel` to see how that allnull() : mcsv1_UDAF(){}; virtual ~allnull(){}; - virtual ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); virtual ReturnCode reset(mcsv1Context* context); - virtual ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn); virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); }; diff --git a/utils/udfsdk/docs/source/usage/introduction.rst b/utils/udfsdk/docs/source/usage/introduction.rst index 6b3544a1e..19c612caa 100644 --- a/utils/udfsdk/docs/source/usage/introduction.rst +++ b/utils/udfsdk/docs/source/usage/introduction.rst @@ -3,7 +3,7 @@ mcsv1_udaf Introduction mcsv1_udaf is a C++ API for writing User Defined Aggregate Functions (UDAF) and User Defined Analytic Functions (UDAnF) for the MariaDB Columstore engine. -In Columnstore 1.1.0, functions written using this API must be compiled into the udfsdk and udf_mysql libraries of the Columnstore code branch. +In Columnstore 1.2, functions written using this API must be compiled into the udfsdk and udf_mysql libraries of the Columnstore code branch. The API has a number of features. The general theme is, there is a class that represents the function, there is a context under which the function operates, and there is a data store for intermediate values. @@ -18,5 +18,5 @@ The steps required to create a function are: * :ref:`Compile udfsdk `. * :ref:`Copy the compiled libraries ` to the working directories. -In 1.1.0, Columnstore does not have a plugin framework, so the functions have to be compiled into the libraries that Columnstore already loads. +In 1.2, Columnstore does not have a plugin framework, so the functions have to be compiled into the libraries that Columnstore already loads. diff --git a/utils/udfsdk/docs/source/usage/sourcefile.rst b/utils/udfsdk/docs/source/usage/sourcefile.rst index b7ed38a32..5c43f29e4 100644 --- a/utils/udfsdk/docs/source/usage/sourcefile.rst +++ b/utils/udfsdk/docs/source/usage/sourcefile.rst @@ -34,21 +34,17 @@ Or, if using the :ref:`complexdatamodel`, type cast the UserData to your UserDat init() ------ -.. c:function:: ReturnCode init(mcsv1Context* context, COL_TYPES& colTypes); +.. c:function:: ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); :param context: The context object for this call. -:param colTypes: A list of the column types of the parameters. +:param colTypes: A list of the ColumnDatum used to access column types of the parameters. In init(), the columnData member is invalid. - COL_TYPES is defined as:: - - typedef std::vector >COL_TYPES; - - see :ref:`ColDataTypes `. In Columnstore 1.1, only one column is supported, so colTyoes will be of length one. + see :ref:`ColumnDatum`. In Columnstore 1.2, An arbitrary number of parameters is supported. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS -The init() method is where you sanity check the input, set the output type and set any run flags for this instance. init() is called one time from the mysqld process. All settings you do here are propagated through the system. +The init() method is where you sanity check the input datatypes, set the output type and set any run flags for this instance. init() is called one time from the mysqld process. All settings you do here are propagated through the system. init() is the exception to type casting the UserData member of context. UserData has not been created when init() is called, so you shouldn't use it here. @@ -60,13 +56,14 @@ If you're using :ref:`simpledatamodel`, you need to set the size of the structur .. rubric:: Check parameter count and type -Each function expects a certain number of columns to entered as parameters in the SQL query. For columnstore 1.1, the number of parameters is limited to one. +Each function expects a certain number of columns to be entered as parameters in the SQL query. It is possible to create a UDAF that accepts a variable number of parameters. You can discover which ones were actually used in init(), and modify your function's behavior accordingly. -colTypes is a vector of each parameter name and type. The name is the colum name from the SQL query. You can use this information to sanity check for compatible type(s) and also to modify your functions behavior based on type. To do this, add members to your data struct to be tested in the other Methods. Set these members based on colDataTypes (:ref:`ColDataTypes `). +colTypes is an array of ColumnData from which can be gleaned the type and name. The name is the column name from the SQL query. You can use this information to sanity check for compatible type(s) and also to modify your functions behavior based on type. To do this, add members to your data struct to be tested in the other Methods. Set these members based on colDataTypes (:ref:`ColDataTypes `). +The actual number of paramters passed can be gotten from context->getParameterCount(). :: - if (colTypes.size() < 1) + if (context->getParameterCount() < 1) { // The error message will be prepended with // "The storage engine for the table doesn't support " @@ -84,7 +81,7 @@ When you create your function using the SQL CREATE FUNCTION command, you must in .. rubric:: Set width and scale -If you have secial requirements, especially if you might be dealing with decimal types:: +If you have special requirements, especially if you might be dealing with decimal types:: context->setColWidth(8); context->setScale(context->getScale()*2); @@ -117,13 +114,11 @@ This function may be called multiple times from both the UM and the PM. Make no nextValue() ----------- -.. c:function:: ReturnCode nextValue(mcsv1Context* context, std::vector& valsIn); +.. c:function:: ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); :param context: The context object for this call -:param valsIn: a vector representing the values to be added for each parameter for this row. - - In Columnstore 1.1, this will be a vector of length one. +:param valsIn: an array representing the values to be added for each parameter for this row. :returns: ReturnCode::ERROR or ReturnCode::SUCCESS @@ -208,7 +203,7 @@ For AVG, you might see:: dropValue --------- -.. c:function:: ReturnCode dropValue(mcsv1Context* context, std::vector& valsDropped); +.. c:function:: ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); :param context: The context object for this call diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index fe1f3fd0e..3d3ac39ca 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -238,5 +238,38 @@ N="Makefile" Type="Makefile"/> + + + + + + + + + + + + + + + + + + + + + + + + + + + From ec3a3846c3d66d79170a0b49244c435bc02b21f2 Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 11 May 2018 09:50:10 -0500 Subject: [PATCH 036/123] MCOL-1201 manual rebase with develop. Obsoletes branch MCOL-1201 --- dbcon/joblist/tupleaggregatestep.cpp | 234 +++++--- dbcon/mysql/ha_calpont_execplan.cpp | 778 ++++++++++++++------------- utils/rowgroup/rowaggregation.cpp | 204 ++++--- 3 files changed, 707 insertions(+), 509 deletions(-) diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 8f7755ad9..be0e2009d 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -1097,7 +1097,8 @@ void TupleAggregateStep::prep1PhaseAggregate( uint32_t bigIntWidth = sizeof(int64_t); uint32_t bigUintWidth = sizeof(uint64_t); // For UDAF - uint32_t projColsUDAFIndex = 0; + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // for count column of average function @@ -1139,6 +1140,7 @@ void TupleAggregateStep::prep1PhaseAggregate( // populate the aggregate rowgroup AGG_MAP aggFuncMap; + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { @@ -1156,8 +1158,9 @@ void TupleAggregateStep::prep1PhaseAggregate( typeAgg.push_back(ti.dtype); widthAgg.push_back(ti.width); SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( - aggOp, stats, 0, i, jobInfo.cntStarPos)); + aggOp, stats, 0, outIdx, jobInfo.cntStarPos)); functionVec.push_back(funct); + ++outIdx; continue; } @@ -1173,9 +1176,10 @@ void TupleAggregateStep::prep1PhaseAggregate( typeAgg.push_back(ti.dtype); widthAgg.push_back(width); SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( - aggOp, stats, 0, i, -1)); + aggOp, stats, 0, outIdx, -1)); functionVec.push_back(funct); + ++outIdx; continue; } @@ -1221,16 +1225,17 @@ void TupleAggregateStep::prep1PhaseAggregate( widthAgg.push_back(width[colProj]); if (groupBy[it->second]->fOutputColumnIndex == (uint32_t) - 1) - groupBy[it->second]->fOutputColumnIndex = i; + groupBy[it->second]->fOutputColumnIndex = outIdx; else functionVec.push_back(SP_ROWAGG_FUNC_t( new RowAggFunctionCol( ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, - i, + outIdx, groupBy[it->second]->fOutputColumnIndex))); + ++outIdx; continue; } else if (find(jobInfo.expressionVec.begin(), jobInfo.expressionVec.end(), key) != @@ -1243,6 +1248,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(ti.precision); typeAgg.push_back(ti.dtype); widthAgg.push_back(ti.width); + ++outIdx; continue; } else if (jobInfo.groupConcatInfo.columns().find(key) != @@ -1255,6 +1261,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); + ++outIdx; continue; } else if (jobInfo.windowSet.find(key) != jobInfo.windowSet.end()) @@ -1266,6 +1273,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); + ++outIdx; continue; } else @@ -1286,16 +1294,16 @@ void TupleAggregateStep::prep1PhaseAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); // Create a RowAggFunctionCol (UDAF subtype) with the context. - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, i)); + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, outIdx)); break; } } @@ -1306,7 +1314,7 @@ void TupleAggregateStep::prep1PhaseAggregate( } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colProj, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colProj, outIdx)); } functionVec.push_back(funct); @@ -1477,6 +1485,14 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(udafFuncCol->fUDAFContext.getPrecision()); typeAgg.push_back(udafFuncCol->fUDAFContext.getResultType()); widthAgg.push_back(udafFuncCol->fUDAFContext.getColWidth()); + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } @@ -1488,6 +1504,13 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); + // If the param is const + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; } break; @@ -1520,6 +1543,11 @@ void TupleAggregateStep::prep1PhaseAggregate( { aggFuncMap.insert(make_pair(boost::make_tuple(key, aggOp, pUDAFFunc), funct->fOutputColumnIndex)); } + + if (aggOp != ROWAGG_MULTI_PARM) + { + ++outIdx; + } } // now fix the AVG function, locate the count(column) position @@ -1671,12 +1699,14 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( uint32_t bigIntWidth = sizeof(int64_t); // map key = column key, operation (enum), and UDAF pointer if UDAF. AGG_MAP aggFuncMap; - set avgSet; +// set avgSet; + list multiParmIndexes; // fOR udaf UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; - uint32_t projColsUDAFIndex = 0; + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; // for count column of average function map avgFuncMap, avgDistFuncMap; @@ -1825,9 +1855,9 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } // skip sum / count(column) if avg is also selected - if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) && - (avgSet.find(aggKey) != avgSet.end())) - continue; +// if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) && +// (avgSet.find(aggKey) != avgSet.end())) +// continue; if (aggOp == ROWAGG_DISTINCT_SUM || aggOp == ROWAGG_DISTINCT_AVG || @@ -1840,12 +1870,12 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { @@ -2063,7 +2093,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(udafFuncCol->fUDAFContext.getResultType()); widthAgg.push_back(udafFuncCol->fUDAFContext.getColWidth()); ++colAgg; - // UDAF Dummy holder for UserData struct + // Column for index of UDAF UserData struct oidsAgg.push_back(oidsProj[colProj]); keysAgg.push_back(aggKey); scaleAgg.push_back(0); @@ -2071,6 +2101,14 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(CalpontSystemCatalog::UBIGINT); widthAgg.push_back(sizeof(uint64_t)); funct->fAuxColumnIndex = colAgg++; + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } @@ -2082,7 +2120,15 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(widthProj[colProj]); + multiParmIndexes.push_back(colAgg); ++colAgg; + // If the param is const + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; } break; @@ -2122,7 +2168,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. AGG_MAP aggDupFuncMap; - pUDAFFunc = NULL; + projColsUDAFIdx = 0; + int64_t multiParms = 0; // copy over the groupby vector // update the outputColumnIndex if returned @@ -2133,8 +2180,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i)); } - projColsUDAFIndex = 0; // locate the return column position in aggregated rowgroup + uint64_t outIdx = 0; for (uint64_t i = 0; i < returnedColVec.size(); i++) { udafc = NULL; @@ -2144,23 +2191,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( RowAggFunctionType stats = statsFuncIdMap(returnedColVec[i].second); int colAgg = -1; - if (aggOp == ROWAGG_UDAF) + if (aggOp == ROWAGG_MULTI_PARM) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; - for (; it != jobInfo.projectionCols.end(); it++) - { - udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; - if (udafc) - { - pUDAFFunc = udafc->getContext().getFunction(); - break; - } - } - if (it == jobInfo.projectionCols.end()) - { - throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); - } + // Skip on final agg.: Extra parms for an aggregate have no work there. + ++multiParms; + continue; } if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != @@ -2188,6 +2223,25 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } } + if (aggOp == ROWAGG_UDAF) + { + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; + for (; it != jobInfo.projectionCols.end(); it++) + { + udafc = dynamic_cast((*it).get()); + projColsUDAFIdx++; + if (udafc) + { + pUDAFFunc = udafc->getContext().getFunction(); + break; + } + } + if (it == jobInfo.projectionCols.end()) + { + throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); + } + } + switch (aggOp) { case ROWAGG_DISTINCT_AVG: @@ -2438,7 +2492,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (returnColMissing) { Message::Args args; - args.add(keyName(i, retKey, jobInfo)); + args.add(keyName(outIdx, retKey, jobInfo)); string emsg = IDBErrorInfo::instance()-> errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); cerr << "prep1PhaseDistinctAggregate: " << emsg << " oid=" @@ -2462,7 +2516,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (jobInfo.groupByColVec[j] == retKey) { if (groupByNoDist[j]->fOutputColumnIndex == (uint32_t) - 1) - groupByNoDist[j]->fOutputColumnIndex = i; + groupByNoDist[j]->fOutputColumnIndex = outIdx; else dupGroupbyIndex = groupByNoDist[j]->fOutputColumnIndex; } @@ -2472,7 +2526,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (dupGroupbyIndex != -1) functionVec2.push_back(SP_ROWAGG_FUNC_t( new RowAggFunctionCol( - ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, i, dupGroupbyIndex))); + ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex))); } else { @@ -2480,11 +2534,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( SP_ROWAGG_FUNC_t funct; if (aggOp == ROWAGG_UDAF) { - funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, i)); + funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, outIdx)); } else { - funct.reset(new RowAggFunctionCol(aggOp, stats, colAgg, i)); + funct.reset(new RowAggFunctionCol(aggOp, stats, colAgg, outIdx)); } if (aggOp == ROWAGG_COUNT_NO_OP) @@ -2521,6 +2575,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // for (i // now fix the AVG function, locate the count(column) position @@ -2538,7 +2593,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } // there is avg(k), but no count(k) in the select list - uint64_t lastCol = returnedColVec.size(); + uint64_t lastCol = outIdx; for (map::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++) { @@ -2753,6 +2808,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); groupBySub.push_back(groupby); + // Keep a count of the parms after the first for any aggregate. + // These will be skipped and the count needs to be subtracted + // from where the aux column will be. + int64_t multiParms = 0; + // tricky part : 2 function vectors // -- dummy function vector for sub-aggregator, which does distinct only // -- aggregate function on this distinct column for rowAggDist @@ -2760,6 +2820,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } if (returnedColVec[k].first != distinctColKey) continue; @@ -2780,7 +2845,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -2799,9 +2864,15 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( { vector functionSub1 = functionNoDistVec; vector functionSub2; + int64_t multiParms = 0; for (uint64_t k = 0; k < returnedColVec.size(); k++) { + if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM) + { + ++multiParms; + continue; + } // search non-distinct functions in functionVec vector::iterator it = functionVec2.begin(); @@ -2817,7 +2888,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } else if ((f->fOutputColumnIndex == k) && @@ -2839,7 +2910,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -2893,7 +2964,8 @@ void TupleAggregateStep::prep2PhasesAggregate( set avgSet; vector >& returnedColVec = jobInfo.returnedColVec; // For UDAF - uint32_t projColsUDAFIndex = 0; + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; @@ -3073,11 +3145,11 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3305,6 +3377,14 @@ void TupleAggregateStep::prep2PhasesAggregate( typeAggPm.push_back(CalpontSystemCatalog::UBIGINT); widthAggPm.push_back(bigUintWidth); funct->fAuxColumnIndex = colAggPm++; + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } @@ -3317,6 +3397,13 @@ void TupleAggregateStep::prep2PhasesAggregate( typeAggPm.push_back(typeProj[colProj]); widthAggPm.push_back(width[colProj]); colAggPm++; + // If the param is const + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; } break; @@ -3342,7 +3429,7 @@ void TupleAggregateStep::prep2PhasesAggregate( map avgFuncMap; AGG_MAP aggDupFuncMap; - projColsUDAFIndex = 0; + projColsUDAFIdx = 0; // copy over the groupby vector // update the outputColumnIndex if returned for (uint64_t i = 0; i < groupByPm.size(); i++) @@ -3372,12 +3459,12 @@ void TupleAggregateStep::prep2PhasesAggregate( udafc = NULL; if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3703,7 +3790,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( set avgSet, avgDistSet; vector >& returnedColVec = jobInfo.returnedColVec; // For UDAF - uint32_t projColsUDAFIndex = 0; + uint32_t projColsUDAFIdx = 0; + uint32_t udafcParamIdx = 0; UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; @@ -3919,11 +4007,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -4147,6 +4235,14 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( typeAggPm.push_back(CalpontSystemCatalog::UBIGINT); widthAggPm.push_back(sizeof(uint64_t)); funct->fAuxColumnIndex = colAggPm++; + // If the first param is const + udafcParamIdx = 0; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; break; } @@ -4160,6 +4256,13 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( widthAggPm.push_back(width[colProj]); multiParmIndexes.push_back(colAggPm); colAggPm++; + // If the param is const + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + ++udafcParamIdx; } break; @@ -4208,9 +4311,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funct.reset(new RowUDAFFunctionCol( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fOutputColumnIndex, + udafFuncCol->fOutputColumnIndex-multiParms, udafFuncCol->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); + pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); } else { @@ -4218,9 +4322,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funcPm->fAggFunction, funcPm->fStatsFunction, funcPm->fOutputColumnIndex, - funcPm->fOutputColumnIndex, + funcPm->fOutputColumnIndex-multiParms, funcPm->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); + pUDAFFunc = NULL; } } @@ -4251,7 +4356,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // These will be skipped and the count needs to be subtracted // from where the aux column will be. int64_t multiParms = 0; - projColsUDAFIndex = 0; + projColsUDAFIdx = 0; // check if the count column for AVG is also a returned column, // if so, replace the "-1" to actual position in returned vec. map avgFuncMap, avgDistFuncMap; @@ -4286,11 +4391,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { - std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIndex; + std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); - projColsUDAFIndex++; + projColsUDAFIdx++; if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -4436,6 +4541,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( precisionAggDist.push_back(precisionAggUm[colUm]); typeAggDist.push_back(typeAggUm[colUm]); widthAggDist.push_back(widthAggUm[colUm]); + colUm -= multiParms; } // not a direct hit -- a returned column is not already in the RG from PMs @@ -4472,8 +4578,16 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(retKey); scaleAggDist.push_back(0); - precisionAggDist.push_back(19); - typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + if (isUnsigned(typeAggUm[colUm])) + { + precisionAggDist.push_back(20); + typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); + } + else + { + precisionAggDist.push_back(19); + typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + } widthAggDist.push_back(bigIntWidth); } } diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 4a86dc218..d030d1855 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4097,426 +4097,429 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) try { - // special parsing for group_concat - if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) - { - Item_func_group_concat* gc = (Item_func_group_concat*)isp; + // special parsing for group_concat + if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + Item_func_group_concat* gc = (Item_func_group_concat*)isp; vector orderCols; - RowColumn* rowCol = new RowColumn(); + RowColumn* rowCol = new RowColumn(); vector selCols; - uint32_t select_ctn = gc->count_field(); - ReturnedColumn* rc = NULL; + uint32_t select_ctn = gc->count_field(); + ReturnedColumn* rc = NULL; - for (uint32_t i = 0; i < select_ctn; i++) + for (uint32_t i = 0; i < select_ctn; i++) + { + rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); + + if (!rc || gwi.fatalParseError) + { + if (ac) + delete ac; + return NULL; + } + + selCols.push_back(SRCP(rc)); + } + + ORDER** order_item, **end; + + for (order_item = gc->get_order(), + end = order_item + gc->order_field(); order_item < end; + order_item++) + { + Item* ord_col = *(*order_item)->item; + + if (ord_col->type() == Item::INT_ITEM) { - rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); + Item_int* id = (Item_int*)ord_col; + + if (id->val_int() > (int)selCols.size()) + { + gwi.fatalParseError = true; + if (ac) + delete ac; + return NULL; + } + + rc = selCols[id->val_int() - 1]->clone(); + rc->orderPos(id->val_int() - 1); + } + else + { + rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); if (!rc || gwi.fatalParseError) { - if (ac) - delete ac; + if (ac) + delete ac; return NULL; } - - selCols.push_back(SRCP(rc)); } - ORDER** order_item, **end; + // 10.2 TODO: direction is now a tri-state flag + rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); + orderCols.push_back(SRCP(rc)); + } - for (order_item = gc->get_order(), - end = order_item + gc->order_field(); order_item < end; - order_item++) - { - Item* ord_col = *(*order_item)->item; - - if (ord_col->type() == Item::INT_ITEM) - { - Item_int* id = (Item_int*)ord_col; - - if (id->val_int() > (int)selCols.size()) - { - gwi.fatalParseError = true; - if (ac) - delete ac; - return NULL; - } - - rc = selCols[id->val_int() - 1]->clone(); - rc->orderPos(id->val_int() - 1); - } - else - { - rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); - - if (!rc || gwi.fatalParseError) - { - if (ac) - delete ac; - return NULL; - } - } - - // 10.2 TODO: direction is now a tri-state flag - rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); - orderCols.push_back(SRCP(rc)); - } - - rowCol->columnVec(selCols); - (dynamic_cast(ac))->orderCols(orderCols); - parm.reset(rowCol); + rowCol->columnVec(selCols); + (dynamic_cast(ac))->orderCols(orderCols); + parm.reset(rowCol); ac->aggParms().push_back(parm); - if (gc->str_separator()) - { - string separator; - separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); - (dynamic_cast(ac))->separator(separator); - } - } - else + if (gc->str_separator()) { - for (uint32_t i = 0; i < isp->argument_count(); i++) + string separator; + separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); + (dynamic_cast(ac))->separator(separator); + } + } + else + { + for (uint32_t i = 0; i < isp->argument_count(); i++) + { + Item* sfitemp = sfitempp[i]; + Item::Type sfitype = sfitemp->type(); + + switch (sfitype) { - Item* sfitemp = sfitempp[i]; - Item::Type sfitype = sfitemp->type(); - - switch (sfitype) + case Item::FIELD_ITEM: { - case Item::FIELD_ITEM: - { - Item_field* ifp = reinterpret_cast(sfitemp); - SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + Item_field* ifp = reinterpret_cast(sfitemp); + SimpleColumn* sc = buildSimpleColumn(ifp, gwi); - if (!sc) - { - gwi.fatalParseError = true; - break; - } - - parm.reset(sc); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); - TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); - gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); - break; - } - - case Item::INT_ITEM: - case Item::STRING_ITEM: - case Item::REAL_ITEM: - case Item::DECIMAL_ITEM: - { - // treat as count(*) - if (ac->aggOp() == AggregateColumn::COUNT) - ac->aggOp(AggregateColumn::COUNT_ASTERISK); - - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::NULL_ITEM: - { - parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::FUNC_ITEM: - { - Item_func* ifp = (Item_func*)sfitemp; - ReturnedColumn* rc = 0; - - // check count(1+1) case - vector tmpVec; - uint16_t parseInfo = 0; - parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); - - if (parseInfo & SUB_BIT) - { - gwi.fatalParseError = true; - break; - } - else if (!gwi.fatalParseError && - !(parseInfo & AGG_BIT) && - !(parseInfo & AF_BIT) && - tmpVec.size() == 0) - { - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - FunctionColumn* fc = dynamic_cast(rc); - - if ((fc && fc->functionParms().empty()) || !fc) - { - //ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (dynamic_cast(rc)) - { - //@bug5229. handle constant function on aggregate argument - ac->constCol(SRCP(rc)); - break; - } - } - } - - // MySQL carelessly allows correlated aggregate function on the WHERE clause. - // Here is the work around to deal with that inconsistence. - // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; - ClauseType clauseType = gwi.clauseType; - - if (gwi.clauseType == WHERE) - gwi.clauseType = HAVING; - - // @bug 3603. for cases like max(rand()). try to build function first. - if (!rc) - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - - parm.reset(rc); - gwi.clauseType = clauseType; - - if (gwi.fatalParseError) - break; - - break; - } - - case Item::REF_ITEM: - { - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (rc) - { - parm.reset(rc); - break; - } - } - - default: + if (!sc) { gwi.fatalParseError = true; - //gwi.parseErrorText = "Non-supported Item in Aggregate function"; + break; + } + + parm.reset(sc); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); + TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); + gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); + break; + } + + case Item::INT_ITEM: + case Item::STRING_ITEM: + case Item::REAL_ITEM: + case Item::DECIMAL_ITEM: + { + // treat as count(*) + if (ac->aggOp() == AggregateColumn::COUNT) + ac->aggOp(AggregateColumn::COUNT_ASTERISK); + parm.reset(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError)); + ac->constCol(parm); + break; + } + + case Item::NULL_ITEM: + { + parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); + ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + break; + } + + case Item::FUNC_ITEM: + { + Item_func* ifp = (Item_func*)sfitemp; + ReturnedColumn* rc = 0; + + // check count(1+1) case + vector tmpVec; + uint16_t parseInfo = 0; + parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); + + if (parseInfo & SUB_BIT) + { + gwi.fatalParseError = true; + break; + } + else if (!gwi.fatalParseError && + !(parseInfo & AGG_BIT) && + !(parseInfo & AF_BIT) && + tmpVec.size() == 0) + { + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + FunctionColumn* fc = dynamic_cast(rc); + + if ((fc && fc->functionParms().empty()) || !fc) + { + //ac->aggOp(AggregateColumn::COUNT_ASTERISK); + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (dynamic_cast(rc)) + { + //@bug5229. handle constant function on aggregate argument + ac->constCol(SRCP(rc)); + break; + } + } + } + + // MySQL carelessly allows correlated aggregate function on the WHERE clause. + // Here is the work around to deal with that inconsistence. + // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; + ClauseType clauseType = gwi.clauseType; + + if (gwi.clauseType == WHERE) + gwi.clauseType = HAVING; + + // @bug 3603. for cases like max(rand()). try to build function first. + if (!rc) + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + + parm.reset(rc); + gwi.clauseType = clauseType; + + if (gwi.fatalParseError) + break; + + break; + } + + case Item::REF_ITEM: + { + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (rc) + { + parm.reset(rc); + break; } } - if (gwi.fatalParseError) + default: { - if (gwi.parseErrorText.empty()) - { - Message::Args args; + gwi.fatalParseError = true; + //gwi.parseErrorText = "Non-supported Item in Aggregate function"; + } + } - if (item->name) - args.add(item->name); - else - args.add(""); + if (gwi.fatalParseError) + { + if (gwi.parseErrorText.empty()) + { + Message::Args args; - gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); - } + if (item->name) + args.add(item->name); + else + args.add(""); + + gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); + } if (ac) delete ac; - return NULL; - } + return NULL; + } if (parm) { // MCOL-1201 multi-argument aggregate ac->aggParms().push_back(parm); } - } } + } // Get result type // Modified for MCOL-1201 multi-argument aggregate if (ac->aggParms().size() > 0) - { + { // These are all one parm functions, so we can safely // use the first parm for result type. parm = ac->aggParms()[0]; - if (isp->sum_func() == Item_sum::AVG_FUNC || - isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) + if (isp->sum_func() == Item_sum::AVG_FUNC || + isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); + + switch (ct.colDataType) { - CalpontSystemCatalog::ColType ct = parm->resultType(); + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::DECIMAL; + ct.colWidth = 8; + ct.scale += 4; + break; - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::DECIMAL; - ct.colWidth = 8; - ct.scale += 4; - break; +#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; +#endif - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; - #endif - - default: - break; - } - - ac->resultType(ct); + default: + break; } - else if (isp->sum_func() == Item_sum::COUNT_FUNC || - isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::COUNT_FUNC || + isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = parm->resultType().scale; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_FUNC || + isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); + + switch (ct.colDataType) { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = parm->resultType().scale; - ac->resultType(ct); + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + ct.colDataType = CalpontSystemCatalog::BIGINT; + + // no break, let fall through + + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + ct.colWidth = 8; + break; + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::UBIGINT; + ct.colWidth = 8; + break; + +#if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; +#endif + + default: + break; } - else if (isp->sum_func() == Item_sum::SUM_FUNC || - isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - switch (ct.colDataType) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - ct.colDataType = CalpontSystemCatalog::BIGINT; - - // no break, let fall through - - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - ct.colWidth = 8; - break; - - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::UBIGINT; - ct.colWidth = 8; - break; - - #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; - #endif - - default: - break; - } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::STD_FUNC || - isp->sum_func() == Item_sum::VARIANCE_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - ct.scale = 0; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = 0; - ct.precision = -16; // borrowed to indicate skip null value check on connector - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) - { - //Item_func_group_concat* gc = (Item_func_group_concat*)isp; - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::VARCHAR; - ct.colWidth = isp->max_length; - ct.precision = 0; - ac->resultType(ct); - } - else - { - // UDAF result type will be set below. - ac->resultType(parm->resultType()); - } + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::STD_FUNC || + isp->sum_func() == Item_sum::VARIANCE_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + ct.scale = 0; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = 0; + ct.precision = -16; // borrowed to indicate skip null value check on connector + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + //Item_func_group_concat* gc = (Item_func_group_concat*)isp; + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::VARCHAR; + ct.colWidth = isp->max_length; + ct.precision = 0; + ac->resultType(ct); } else { - ac->resultType(colType_MysqlToIDB(isp)); + // UDAF result type will be set below. + ac->resultType(parm->resultType()); } + } + else + { + ac->resultType(colType_MysqlToIDB(isp)); + } - // adjust decimal result type according to internalDecimalScale - if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) + // adjust decimal result type according to internalDecimalScale + if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) + { + CalpontSystemCatalog::ColType ct = ac->resultType(); + ct.scale = gwi.internalDecimalScale; + ac->resultType(ct); + } + + // check for same aggregate on the select list + ac->expressionId(ci->expressionId++); + + if (gwi.clauseType != SELECT) + { + for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) { - CalpontSystemCatalog::ColType ct = ac->resultType(); - ct.scale = gwi.internalDecimalScale; - ac->resultType(ct); + if (*ac == gwi.returnedCols[i].get()) + ac->expressionId(gwi.returnedCols[i]->expressionId()); } + } - // check for same aggregate on the select list - ac->expressionId(ci->expressionId++); - - if (gwi.clauseType != SELECT) + // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will + // be applied in ExeMgr. When the ExeMgr fix is available, this checking + // will be taken out. + if (isp->sum_func() != Item_sum::UDF_SUM_FUNC) { - for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) - { - if (*ac == gwi.returnedCols[i].get()) - ac->expressionId(gwi.returnedCols[i]->expressionId()); - } + if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "No project column found for aggregate function"; + if (ac) + delete ac; + return NULL; + } + else if (ac->constCol()) + { + gwi.count_asterisk_list.push_back(ac); + } } - // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will - // be applied in ExeMgr. When the ExeMgr fix is available, this checking - // will be taken out. - if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) - { - gwi.fatalParseError = true; - gwi.parseErrorText = "No project column found for aggregate function"; - if (ac) - delete ac; - return NULL; - } - else if (ac->constCol()) - { - gwi.count_asterisk_list.push_back(ac); - } - - // For UDAF, populate the context and call the UDAF init() function. + // For UDAF, populate the context and call the UDAF init() function. // The return type is (should be) set in context by init(). - if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) + if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) + { + UDAFColumn* udafc = dynamic_cast(ac); + + if (udafc) { - UDAFColumn* udafc = dynamic_cast(ac); + mcsv1Context& context = udafc->getContext(); + context.setName(isp->func_name()); - if (udafc) - { - mcsv1Context& context = udafc->getContext(); - context.setName(isp->func_name()); - - // Set up the return type defaults for the call to init() - context.setResultType(udafc->resultType().colDataType); - context.setColWidth(udafc->resultType().colWidth); - context.setScale(udafc->resultType().scale); - context.setPrecision(udafc->resultType().precision); + // Set up the return type defaults for the call to init() + context.setResultType(udafc->resultType().colDataType); + context.setColWidth(udafc->resultType().colWidth); + context.setScale(udafc->resultType().scale); + context.setPrecision(udafc->resultType().precision); context.setParamCount(udafc->aggParms().size()); ColumnDatum colType; @@ -4533,7 +4536,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) colTypes[i] = colType; } - // Call the user supplied init() + // Call the user supplied init() mcsv1sdk::mcsv1_UDAF* udaf = context.getFunction(); if (!udaf) { @@ -4544,37 +4547,37 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) return NULL; } if (udaf->init(&context, colTypes) == mcsv1_UDAF::ERROR) - { - gwi.fatalParseError = true; - gwi.parseErrorText = udafc->getContext().getErrorMessage(); + { + gwi.fatalParseError = true; + gwi.parseErrorText = udafc->getContext().getErrorMessage(); if (ac) delete ac; - return NULL; - } + return NULL; + } // UDAF_OVER_REQUIRED means that this function is for Window // Function only. Reject it here in aggregate land. - if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) - { - gwi.fatalParseError = true; - gwi.parseErrorText = - logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, - context.getName()); + if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) + { + gwi.fatalParseError = true; + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, + context.getName()); if (ac) delete ac; - return NULL; - } - - // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; - ct.colDataType = context.getResultType(); - ct.colWidth = context.getColWidth(); - ct.scale = context.getScale(); - ct.precision = context.getPrecision(); - udafc->resultType(ct); + return NULL; } + + // Set the return type as set in init() + CalpontSystemCatalog::ColType ct; + ct.colDataType = context.getResultType(); + ct.colWidth = context.getColWidth(); + ct.scale = context.getScale(); + ct.precision = context.getPrecision(); + udafc->resultType(ct); } } + } catch (std::logic_error e) { gwi.fatalParseError = true; @@ -4744,6 +4747,7 @@ void gp_walk(const Item* item, void* arg) if (isp) { + // @bug 3669. trim trailing spaces for the compare value if (isp->result_type() == STRING_RESULT) { String val, *str = isp->val_str(&val); @@ -4754,7 +4758,10 @@ void gp_walk(const Item* item, void* arg) cval.assign(str->ptr(), str->length()); } + size_t spos = cval.find_last_not_of(" "); + if (spos != string::npos) + cval = cval.substr(0, spos + 1); gwip->rcWorkStack.push(new ConstantColumn(cval)); break; @@ -7908,8 +7915,15 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i setError(gwi.thd, ER_INTERNAL_ERROR, gwi.parseErrorText, gwi); return ER_CHECK_NOT_IMPLEMENTED; } - - (*coliter)->aggParms().push_back(minSc); + // Replace the last (presumably constant) object with minSc + if ((*coliter)->aggParms().empty()) + { + (*coliter)->aggParms().push_back(minSc); + } + else + { + (*coliter)->aggParms()[0] = minSc; + } } std::vector::iterator funciter; @@ -8075,9 +8089,9 @@ int cp_get_group_plan(THD* thd, SCSEP& csep, cal_impl_if::cal_group_info& gi) gwi.thd = thd; int status = getGroupPlan(gwi, select_lex, csep, gi); - cerr << "---------------- cp_get_group_plan EXECUTION PLAN ----------------" << endl; - cerr << *csep << endl ; - cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; +// cerr << "---------------- cp_get_group_plan EXECUTION PLAN ----------------" << endl; +// cerr << *csep << endl ; +// cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; if (status > 0) return ER_INTERNAL_ERROR; diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 043dcaac2..bead74aff 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -1723,17 +1723,7 @@ void RowAggregation::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(pFunctionCol.get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colOut + 1, rowUDAF, i); - } - else - { - throw logic_error("(3)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colOut + 1, i); break; } @@ -2012,31 +2002,60 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu fRow.setLongDoubleField(fRow.getLongDoubleField(colAux + 1) + valIn * valIn, colAux + 1); } -void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) +void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, + int64_t colAux, uint64_t& funcColsIdx) { uint32_t paramCount = fRGContext.getParameterCount(); // The vector of parameters to be sent to the UDAF mcsv1sdk::ColumnDatum valsIn[paramCount]; uint32_t dataFlags[paramCount]; - + ConstantColumn* cc; + bool bIsNull = false; execplan::CalpontSystemCatalog::ColDataType colDataType; for (uint32_t i = 0; i < paramCount; ++i) { + // If UDAF_IGNORE_NULLS is on, bIsNull gets set the first time + // we find a null. We still need to eat the rest of the parameters + // to sync updateEntry + if (bIsNull) + { + ++funcColsIdx; + continue; + } + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; mcsv1sdk::ColumnDatum& datum = valsIn[i]; // Turn on NULL flags dataFlags[i] = 0; - if (isNull(&fRowGroupIn, rowIn, colIn) == true) + + // If this particular parameter is a constant, then we need + // to acces the constant value rather than a row value. + cc = NULL; + if (pFunctionCol->fpConstCol) + { + cc = dynamic_cast(pFunctionCol->fpConstCol.get()); + } + + if ((cc && cc->type() == ConstantColumn::NULLDATA) + || (!cc && isNull(&fRowGroupIn, rowIn, colIn) == true)) { if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { - return; + bIsNull = true; + ++funcColsIdx; + continue; } dataFlags[i] |= mcsv1sdk::PARAM_IS_NULL; } - - colDataType = fRowGroupIn.getColTypes()[colIn]; - if (!fRGContext.isParamNull(i)) + + if (cc) + { + colDataType = cc->resultType().colDataType; + } + else + { + colDataType = fRowGroupIn.getColTypes()[colIn]; + } + if (!(dataFlags[i] & mcsv1sdk::PARAM_IS_NULL)) { switch (colDataType) { @@ -2045,13 +2064,38 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::MEDINT: case execplan::CalpontSystemCatalog::INT: case execplan::CalpontSystemCatalog::BIGINT: + { + datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + if (cc) + { + datum.columnData = cc->getIntVal(const_cast(rowIn), bIsNull); + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + } + break; + } case execplan::CalpontSystemCatalog::DECIMAL: case execplan::CalpontSystemCatalog::UDECIMAL: { - datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); - datum.scale = fRowGroupIn.getScale()[colIn]; - datum.precision = fRowGroupIn.getPrecision()[colIn]; + datum.dataType = colDataType; + if (cc) + { + datum.columnData = cc->getDecimalVal(const_cast(rowIn), bIsNull).value; + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + datum.columnData = rowIn.getIntField(colIn); + datum.scale = fRowGroupIn.getScale()[colIn]; + datum.precision = fRowGroupIn.getPrecision()[colIn]; + } break; } @@ -2062,7 +2106,14 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::UBIGINT: { datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); + if (cc) + { + datum.columnData = cc->getUintVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getUintField(colIn); + } break; } @@ -2070,7 +2121,14 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::UDOUBLE: { datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; - datum.columnData = rowIn.getDoubleField(colIn); + if (cc) + { + datum.columnData = cc->getDoubleVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getDoubleField(colIn); + } break; } @@ -2078,22 +2136,55 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::UFLOAT: { datum.dataType = execplan::CalpontSystemCatalog::FLOAT; - datum.columnData = rowIn.getFloatField(colIn); + if (cc) + { + datum.columnData = cc->getFloatVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getFloatField(colIn); + } break; } case execplan::CalpontSystemCatalog::DATE: + { + datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + if (cc) + { + datum.columnData = cc->getDateIntVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getUintField(colIn); + } + break; + } case execplan::CalpontSystemCatalog::DATETIME: { datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; - datum.columnData = rowIn.getUintField(colIn); + if (cc) + { + datum.columnData = cc->getDatetimeIntVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getUintField(colIn); + } break; } case execplan::CalpontSystemCatalog::TIME: { datum.dataType = execplan::CalpontSystemCatalog::BIGINT; - datum.columnData = rowIn.getIntField(colIn); + if (cc) + { + datum.columnData = cc->getTimeIntVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getIntField(colIn); + } break; } @@ -2105,7 +2196,14 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int case execplan::CalpontSystemCatalog::BLOB: { datum.dataType = colDataType; - datum.columnData = rowIn.getStringField(colIn); + if (cc) + { + datum.columnData = cc->getStrVal(const_cast(rowIn), bIsNull); + } + else + { + datum.columnData = rowIn.getStringField(colIn); + } break; } @@ -2147,6 +2245,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { + RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColsIdx].get()); rowUDAF->bInterrupted = true; throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } @@ -2443,17 +2542,7 @@ void RowAggregationUM::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); - } - else - { - throw logic_error("(5)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colAux, i); break; } @@ -3991,17 +4080,7 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); - } - else - { - throw logic_error("(6)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colAux, i); break; } @@ -4199,20 +4278,20 @@ void RowAggregationUMP2::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut // colAux(in) - Where the UDAF userdata resides // rowUDAF(in) - pointer to the RowUDAFFunctionCol for this UDAF instance //------------------------------------------------------------------------------ -void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, - RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx) +void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, + int64_t colAux, uint64_t& funcColsIdx) { static_any::any valOut; // Get the user data - boost::shared_ptr userData = rowIn.getUserData(colIn + 1); + boost::shared_ptr userDataIn = rowIn.getUserData(colIn+1); // Unlike other aggregates, the data isn't in colIn, so testing it for NULL // there won't help. In case of NULL, userData will be NULL. uint32_t flags[1]; flags[0] = 0; - if (!userData) + if (!userDataIn) { if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { @@ -4230,11 +4309,12 @@ void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, // Call the UDAF subEvaluate method mcsv1sdk::mcsv1_UDAF::ReturnCode rc; - rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userData.get()); + rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userDataIn.get()); fRGContext.setUserData(NULL); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) { + RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[funcColsIdx].get()); rowUDAF->bInterrupted = true; throw logging::IDBExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } @@ -4429,17 +4509,7 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn) case ROWAGG_UDAF: { - RowUDAFFunctionCol* rowUDAF = dynamic_cast(fFunctionCols[i].get()); - - if (rowUDAF) - { - doUDAF(rowIn, colIn, colOut, colAux, rowUDAF, i); - } - else - { - throw logic_error("(7)A UDAF function is called but there's no RowUDAFFunctionCol"); - } - + doUDAF(rowIn, colIn, colOut, colAux, i); break; } From ea70806e93f19772edd65bc0a6a2164fec5e98ea Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 25 May 2018 12:56:29 -0500 Subject: [PATCH 037/123] MCOL-1201 Add support for UDAF multiple parm constants --- dbcon/execplan/constantcolumn.h | 2 + dbcon/joblist/jlf_common.cpp | 2 +- dbcon/joblist/joblistfactory.cpp | 10 +- dbcon/joblist/windowfunctionstep.cpp | 9 +- .../primproc/batchprimitiveprocessor.cpp | 16 +- utils/common/any.hpp | 7 +- utils/loggingcpp/errorcodes.cpp | 2 +- utils/messageqcpp/bytestream.h | 1 + utils/rowgroup/rowaggregation.h | 36 ++- utils/udfsdk/allnull.h | 1 - utils/udfsdk/avg_mode.h | 1 - utils/udfsdk/avgx.h | 1 - utils/udfsdk/mcsv1_udaf.h | 1 - utils/udfsdk/median.h | 1 - utils/udfsdk/regr_avgx.cpp | 6 +- utils/udfsdk/regr_avgx.h | 1 - utils/udfsdk/ssq.h | 1 - utils/udfsdk/udfsdk.vpj | 33 --- utils/windowfunction/wf_udaf.cpp | 276 +++++++++++------- utils/windowfunction/wf_udaf.h | 2 - utils/windowfunction/windowfunctiontype.cpp | 24 +- utils/windowfunction/windowfunctiontype.h | 7 +- 22 files changed, 265 insertions(+), 175 deletions(-) diff --git a/dbcon/execplan/constantcolumn.h b/dbcon/execplan/constantcolumn.h index 04098faae..be0731044 100644 --- a/dbcon/execplan/constantcolumn.h +++ b/dbcon/execplan/constantcolumn.h @@ -38,6 +38,8 @@ class ByteStream; */ namespace execplan { +class ConstantColumn; + /** * @brief A class to represent a constant return column * diff --git a/dbcon/joblist/jlf_common.cpp b/dbcon/joblist/jlf_common.cpp index f5dbeee17..4b1980d49 100644 --- a/dbcon/joblist/jlf_common.cpp +++ b/dbcon/joblist/jlf_common.cpp @@ -405,7 +405,7 @@ uint32_t getTupleKey(JobInfo& jobInfo, const SRCP& srcp, bool add) if (add) { - // setTupleInfo first if add is ture, ok if already set. + // setTupleInfo first if add is true, ok if already set. const SimpleColumn* sc = dynamic_cast(srcp.get()); if (sc != NULL) diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index 4cf7bccc5..033bf2643 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -300,6 +300,7 @@ const JobStepVector doProject(const RetColsVector& retCols, JobInfo& jobInfo) { const ArithmeticColumn* ac = NULL; const FunctionColumn* fc = NULL; + const ConstantColumn* cc = NULL; uint64_t eid = -1; CalpontSystemCatalog::ColType ct; ExpressionStep* es = new ExpressionStep(jobInfo); @@ -316,6 +317,11 @@ const JobStepVector doProject(const RetColsVector& retCols, JobInfo& jobInfo) eid = fc->expressionId(); ct = fc->resultType(); } + else if ((cc = dynamic_cast(retCols[i].get())) != NULL) + { + eid = cc->expressionId(); + ct = cc->resultType(); + } else { std::ostringstream errmsg; @@ -1004,7 +1010,9 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo for (uint32_t parm = 0; parm < aggParms.size(); ++parm) { - if (aggc->constCol().get() != NULL) + // Only do the optimization of converting to count(*) if + // there is only one parameter. + if (aggParms.size() == 1 && aggc->constCol().get() != NULL) { // replace the aggregate on constant with a count(*) SRCP clone; diff --git a/dbcon/joblist/windowfunctionstep.cpp b/dbcon/joblist/windowfunctionstep.cpp index 4d24f0b4b..2a93f680b 100644 --- a/dbcon/joblist/windowfunctionstep.cpp +++ b/dbcon/joblist/windowfunctionstep.cpp @@ -569,6 +569,7 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) for (RetColsVector::iterator i = jobInfo.windowCols.begin(); i < jobInfo.windowCols.end(); i++) { + bool isUDAF = false; // window function type WindowFunctionColumn* wc = dynamic_cast(i->get()); uint64_t ridx = getColumnIndex(*i, colIndexMap, jobInfo); // result index @@ -590,6 +591,7 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) // if (boost::iequals(wc->functionName(),"UDAF_FUNC") if (wc->functionName() == "UDAF_FUNC") { + isUDAF = true; ++wfsUserFunctionCount; } @@ -646,10 +648,13 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) // column type for functor templates int ct = 0; + if (isUDAF) + { + ct = wc->getUDAFContext().getResultType(); + } // make sure index is in range - if (fields.size() > 1 && fields[1] >= 0 && static_cast(fields[1]) < types.size()) + else if (fields.size() > 1 && fields[1] >= 0 && static_cast(fields[1]) < types.size()) ct = types[fields[1]]; - // workaround for functions using "within group (order by)" syntax string fn = boost::to_upper_copy(wc->functionName()); diff --git a/primitives/primproc/batchprimitiveprocessor.cpp b/primitives/primproc/batchprimitiveprocessor.cpp index bc56a7430..019761d39 100644 --- a/primitives/primproc/batchprimitiveprocessor.cpp +++ b/primitives/primproc/batchprimitiveprocessor.cpp @@ -1677,15 +1677,11 @@ void BatchPrimitiveProcessor::execute() } catch (logging::QueryDataExcept& qex) { - ostringstream os; - os << qex.what() << endl; - writeErrorMsg(os.str(), qex.errorCode()); + writeErrorMsg(qex.what(), qex.errorCode()); } catch (logging::DictionaryBufferOverflow& db) { - ostringstream os; - os << db.what() << endl; - writeErrorMsg(os.str(), db.errorCode()); + writeErrorMsg(db.what(), db.errorCode()); } catch (scalar_exception& se) { @@ -1758,15 +1754,11 @@ void BatchPrimitiveProcessor::execute() } catch (IDBExcept& iex) { - ostringstream os; - os << iex.what() << endl; - writeErrorMsg(os.str(), iex.errorCode(), true, false); + writeErrorMsg(iex.what(), iex.errorCode(), true, false); } catch (const std::exception& ex) { - ostringstream os; - os << ex.what() << endl; - writeErrorMsg(os.str(), logging::batchPrimitiveProcessorErr); + writeErrorMsg(ex.what(), logging::batchPrimitiveProcessorErr); } catch (...) { diff --git a/utils/common/any.hpp b/utils/common/any.hpp index 5408c5c87..63d05d3d2 100755 --- a/utils/common/any.hpp +++ b/utils/common/any.hpp @@ -11,15 +11,12 @@ #include #include +#include namespace static_any { namespace anyimpl { - struct bad_any_cast - { - }; - struct empty_any { }; @@ -266,7 +263,7 @@ public: T& cast() { if (policy != anyimpl::get_policy()) - throw anyimpl::bad_any_cast(); + throw std::runtime_error("static_any: type mismatch in cast"); T* r = reinterpret_cast(policy->get_value(&object)); return *r; } diff --git a/utils/loggingcpp/errorcodes.cpp b/utils/loggingcpp/errorcodes.cpp index 60919c906..4b4196800 100644 --- a/utils/loggingcpp/errorcodes.cpp +++ b/utils/loggingcpp/errorcodes.cpp @@ -29,7 +29,7 @@ using namespace std; namespace logging { -ErrorCodes::ErrorCodes(): fErrorCodes(), fPreamble("An unexpected condition within the query caused an internal processing error within InfiniDB. Please check the log files for more details. Additional Information: ") +ErrorCodes::ErrorCodes(): fErrorCodes(), fPreamble("An unexpected condition within the query caused an internal processing error within Columnstore. Please check the log files for more details. Additional Information: ") { fErrorCodes[batchPrimitiveStepErr] = "error in BatchPrimitiveStep."; fErrorCodes[tupleBPSErr] = "error in TupleBPS."; diff --git a/utils/messageqcpp/bytestream.h b/utils/messageqcpp/bytestream.h index d1a3f4988..f8453843e 100644 --- a/utils/messageqcpp/bytestream.h +++ b/utils/messageqcpp/bytestream.h @@ -35,6 +35,7 @@ #include "exceptclasses.h" #include "serializeable.h" +#include "any.hpp" class ByteStreamTestSuite; diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index 282f354fc..14e4313cf 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -50,6 +50,7 @@ #include "stlpoolallocator.h" #include "returnedcolumn.h" #include "mcsv1_udaf.h" +#include "constantcolumn.h" // To do: move code that depends on joblist to a proper subsystem. namespace joblist @@ -200,6 +201,13 @@ struct RowAggFunctionCol // 4. for duplicate - point to the real aggretate column to be copied from // Set only on UM, the fAuxColumnIndex is defaulted to fOutputColumnIndex+1 on PM. uint32_t fAuxColumnIndex; + + // For UDAF that have more than one parameter and some parameters are constant. + // There will be a series of RowAggFunctionCol created, one for each parameter. + // The first will be a RowUDAFFunctionCol. Subsequent ones will be RowAggFunctionCol + // with fAggFunction == ROWAGG_MULTI_PARM. Order is important. + // If this parameter is constant, that value is here. + SRCP fpConstCol; }; @@ -220,8 +228,11 @@ struct RowUDAFFunctionCol : public RowAggFunctionCol inputColIndex, outputColIndex, auxColIndex), bInterrupted(false) {} - RowUDAFFunctionCol(const RowUDAFFunctionCol& rhs) : RowAggFunctionCol(ROWAGG_UDAF, ROWAGG_FUNCT_UNDEFINE, - rhs.fInputColumnIndex, rhs.fOutputColumnIndex, rhs.fAuxColumnIndex), fUDAFContext(rhs.fUDAFContext) + RowUDAFFunctionCol(const RowUDAFFunctionCol& rhs) : + RowAggFunctionCol(ROWAGG_UDAF, ROWAGG_FUNCT_UNDEFINE, rhs.fInputColumnIndex, + rhs.fOutputColumnIndex, rhs.fAuxColumnIndex), + fUDAFContext(rhs.fUDAFContext), + bInterrupted(false) {} virtual ~RowUDAFFunctionCol() {} @@ -238,6 +249,16 @@ inline void RowAggFunctionCol::serialize(messageqcpp::ByteStream& bs) const bs << (uint8_t)fAggFunction; bs << fInputColumnIndex; bs << fOutputColumnIndex; + if (fpConstCol) + { + bs << (uint8_t)1; + fpConstCol.get()->serialize(bs); + } + else + { + bs << (uint8_t)0; + } + } inline void RowAggFunctionCol::deserialize(messageqcpp::ByteStream& bs) @@ -245,6 +266,13 @@ inline void RowAggFunctionCol::deserialize(messageqcpp::ByteStream& bs) bs >> (uint8_t&)fAggFunction; bs >> fInputColumnIndex; bs >> fOutputColumnIndex; + uint8_t t; + bs >> t; + if (t) + { + fpConstCol.reset(new ConstantColumn); + fpConstCol.get()->unserialize(bs); + } } inline void RowUDAFFunctionCol::serialize(messageqcpp::ByteStream& bs) const @@ -586,7 +614,7 @@ protected: virtual void doAvg(const Row&, int64_t, int64_t, int64_t); virtual void doStatistics(const Row&, int64_t, int64_t, int64_t); virtual void doBitOp(const Row&, int64_t, int64_t, int); - virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); + virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, uint64_t& funcColsIdx); virtual bool countSpecial(const RowGroup* pRG) { fRow.setIntField<8>(fRow.getIntField<8>(0) + pRG->getRowCount(), 0); @@ -902,7 +930,7 @@ protected: void doStatistics(const Row&, int64_t, int64_t, int64_t); void doGroupConcat(const Row&, int64_t, int64_t); void doBitOp(const Row&, int64_t, int64_t, int); - void doUDAF(const Row&, int64_t, int64_t, int64_t, RowUDAFFunctionCol* rowUDAF, uint64_t& funcColsIdx); + void doUDAF(const Row&, int64_t, int64_t, int64_t, uint64_t& funcColsIdx); bool countSpecial(const RowGroup* pRG) { return false; diff --git a/utils/udfsdk/allnull.h b/utils/udfsdk/allnull.h index da17f5d6b..6a727caf6 100644 --- a/utils/udfsdk/allnull.h +++ b/utils/udfsdk/allnull.h @@ -48,7 +48,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/avg_mode.h b/utils/udfsdk/avg_mode.h index 5722c5fea..fba1fcdcc 100644 --- a/utils/udfsdk/avg_mode.h +++ b/utils/udfsdk/avg_mode.h @@ -56,7 +56,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/avgx.h b/utils/udfsdk/avgx.h index 0569b6091..a830c6803 100644 --- a/utils/udfsdk/avgx.h +++ b/utils/udfsdk/avgx.h @@ -35,7 +35,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index df3f47649..e09228d77 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -68,7 +68,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/median.h b/utils/udfsdk/median.h index 142be6ba8..48bd93c70 100644 --- a/utils/udfsdk/median.h +++ b/utils/udfsdk/median.h @@ -56,7 +56,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/regr_avgx.cpp b/utils/udfsdk/regr_avgx.cpp index c7cc5b56e..aec4f361f 100644 --- a/utils/udfsdk/regr_avgx.cpp +++ b/utils/udfsdk/regr_avgx.cpp @@ -82,7 +82,7 @@ mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* { return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. } - if (valIn_x.empty() || valIn_y.empty()) + if (valIn_x.empty() || valIn_y.empty()) // Usually empty if NULL. Probably redundant { return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. } @@ -107,10 +107,6 @@ mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* { val = valIn_x.cast(); } - else if (valIn_x.compatible(longTypeId)) - { - val = valIn_x.cast(); - } else if (valIn_x.compatible(llTypeId)) { val = valIn_x.cast(); diff --git a/utils/udfsdk/regr_avgx.h b/utils/udfsdk/regr_avgx.h index f70f30d8c..27b8708f7 100644 --- a/utils/udfsdk/regr_avgx.h +++ b/utils/udfsdk/regr_avgx.h @@ -35,7 +35,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/ssq.h b/utils/udfsdk/ssq.h index 2cac61c2c..e27ecf1fa 100644 --- a/utils/udfsdk/ssq.h +++ b/utils/udfsdk/ssq.h @@ -56,7 +56,6 @@ #include #include #include -#include #ifdef _MSC_VER #include #else diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index 3d3ac39ca..fe1f3fd0e 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -238,38 +238,5 @@ N="Makefile" Type="Makefile"/> - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index 5cd5243c5..2876fbf7e 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -451,7 +451,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { mcsv1sdk::mcsv1_UDAF::ReturnCode rc; uint64_t colOut = fFieldIndex[0]; - + bool isNull = false; if ((fFrameUnit == WF__FRAME_ROWS) || (fPrev == -1) || (!fPeer->operator()(getPointer(fRowData->at(c)), getPointer(fRowData->at(fPrev))))) @@ -468,13 +468,24 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // Put the parameter metadata (type, scale, precision) into valsIn mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + ConstantColumn* cc = NULL; for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) { - uint64_t colIn = fFieldIndex[i+1]; mcsv1sdk::ColumnDatum& datum = valsIn[i]; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colIn); + cc = static_cast(fConstantParms[i].get()); + if (cc) + { + datum.dataType = cc->resultType().colDataType; + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + uint64_t colIn = fFieldIndex[i+1]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } } if (b <= c && c <= e) @@ -494,12 +505,14 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) uint32_t flags[getContext().getParameterCount()]; for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { + cc = static_cast(fConstantParms[k].get()); uint64_t colIn = fFieldIndex[k+1]; mcsv1sdk::ColumnDatum& datum = valsIn[k]; // Turn on Null flags or skip based on respect nulls flags[k] = 0; - if (fRow.isNullValue(colIn) == true) + if ((!cc && fRow.isNullValue(colIn) == true) + || (cc && cc->type() == ConstantColumn::NULLDATA)) { if (!bRespectNulls) { @@ -510,133 +523,196 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - // MCOL-1201 Multi-Paramter calls - switch (datum.dataType) + if (!bHasNull && !(flags[k] & mcsv1sdk::PARAM_IS_NULL)) { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DECIMAL: + switch (datum.dataType) { - int64_t valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + int64_t valIn; + if (cc) { - continue; + valIn = cc->getIntVal(fRow, isNull); } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - case CalpontSystemCatalog::UDECIMAL: - { - uint64_t valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + int64_t valIn; + if (cc) { - continue; + valIn = cc->getDecimalVal(fRow, isNull).value; } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - { - double valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + uint64_t valIn; + if (cc) { - continue; + valIn = cc->getUintVal(fRow, isNull); } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - { - float valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + double valIn; + if (cc) { - continue; + valIn = cc->getDoubleVal(fRow, isNull); } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::VARBINARY: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::BLOB: - { - string valIn; - getValue(colIn, valIn); - // Check for distinct, if turned on. - // Currently, distinct only works on the first parameter. - if (k == 0) + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + float valIn; + if (cc) { - continue; + valIn = cc->getFloatVal(fRow, isNull); } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - if (fDistinct) - fDistinctSet.insert(valIn); + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; } - datum.columnData = valIn; - break; - } - default: - { - string errStr = "(" + colType2String[i] + ")"; - errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); - cerr << errStr << endl; - throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::BLOB: + { + string valIn; + if (cc) + { + valIn = cc->getStrVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + { + continue; + } - break; + if (fDistinct) + fDistinctSet.insert(valIn); + } + datum.columnData = valIn; + break; + } + + default: + { + string errStr = "(" + colType2String[i] + ")"; + errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + + break; + } } } // Skip if any value is NULL and respect nulls is off. diff --git a/utils/windowfunction/wf_udaf.h b/utils/windowfunction/wf_udaf.h index f7a4c4b08..fc3f9006d 100644 --- a/utils/windowfunction/wf_udaf.h +++ b/utils/windowfunction/wf_udaf.h @@ -53,8 +53,6 @@ public: // A class to control the execution of User Define Analytic Functions (UDAnF) // as defined by a specialization of mcsv1sdk::mcsv1_UDAF -// The template parameter is currently only used to support DISTINCT, as -// as that is done via a set template class WF_udaf : public WindowFunctionType { diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp index 4c5b4de32..f5598a7e5 100644 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -39,7 +39,6 @@ using namespace logging; using namespace ordering; #include "calpontsystemcatalog.h" -#include "constantcolumn.h" #include "dataconvert.h" // int64_t IDB_pow[19] using namespace execplan; @@ -228,6 +227,9 @@ WindowFunctionType::makeWindowFunction(const string& name, int ct, WindowFunctio break; } + // Copy the only the constant parameter pointers + af->constParms(wc->functionParms()); + return af; } @@ -634,6 +636,26 @@ void* WindowFunctionType::getNullValueByType(int ct, int pos) return v; } +void WindowFunctionType::constParms(const std::vector& functionParms) +{ + // fConstantParms will end up with a copy of functionParms, but only + // the constant types will be copied. Other types will take up space but + // be NULL. This allows us to acces the constants without the overhead + // of dynamic_cast for every row. + for (size_t i = 0; i < functionParms.size(); ++i) + { + ConstantColumn* cc = dynamic_cast(functionParms[i].get()); + if (cc) + { + fConstantParms.push_back(functionParms[i]); + } + else + { + fConstantParms.push_back(SRCP(cc)); + } + } +} + } //namespace // vim:ts=4 sw=4: diff --git a/utils/windowfunction/windowfunctiontype.h b/utils/windowfunction/windowfunctiontype.h index 50732d3b5..efa1c548a 100644 --- a/utils/windowfunction/windowfunctiontype.h +++ b/utils/windowfunction/windowfunctiontype.h @@ -31,7 +31,7 @@ #include "returnedcolumn.h" #include "rowgroup.h" #include "windowframe.h" - +#include "constantcolumn.h" namespace ordering { @@ -198,6 +198,8 @@ public: fStep = step; } + void constParms(const std::vector& functionParms); + static boost::shared_ptr makeWindowFunction(const std::string&, int ct, WindowFunctionColumn* wc); protected: @@ -244,6 +246,9 @@ protected: // output and input field indices: [0] - output std::vector fFieldIndex; + // constant function parameters -- needed for udaf with constant + std::vector fConstantParms; + // row meta data rowgroup::RowGroup fRowGroup; rowgroup::Row fRow; From 05f1752dd05b5f1483f2f1fa3db453cf01a401a3 Mon Sep 17 00:00:00 2001 From: david hill Date: Tue, 5 Jun 2018 15:47:38 -0500 Subject: [PATCH 038/123] MCOL-1405 - fix launch of mysql monitor thread on seperate module install --- procmon/main.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/procmon/main.cpp b/procmon/main.cpp index 2f98bc1e7..b4e23a6e1 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -696,10 +696,10 @@ int main(int argc, char **argv) log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR); //mysqld status monitor thread - if ( ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) || - (PMwithUM == "y") ) + if ( config.moduleType() == "um" || + ( config.moduleType() == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) || + ( config.moduleType() == "pm" && PMwithUM == "y") ) { - pthread_t mysqlThread; ret = pthread_create (&mysqlThread, NULL, (void*(*)(void*)) &mysqlMonitorThread, NULL); if ( ret != 0 ) From 2bbb70f61b9a1642c9c1f5ce715b07005cc234ff Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Wed, 6 Jun 2018 16:18:54 +0100 Subject: [PATCH 039/123] MCOL-1408 Multiple API HWM boundary fixes Fixes the following: * Generate error if calculateRowId fails * No data written when first extent is completely full on a write, all data going to second extent. * 0 byte valArray malloc * valArray free() on no malloc * Column touched but no data written if all data going to second extent * Wrong colWidth used on second extent calculateRowId * Out of bounds memory write (crash) when no data for first extent * Extent not committed if all data going to second extent --- writeengine/wrapper/writeengine.cpp | 244 ++++++++++++++-------------- 1 file changed, 126 insertions(+), 118 deletions(-) diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index 7cb3ca85e..afea06fee 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -2008,7 +2008,6 @@ timer.stop("tokenize"); if (it != aColExtsInfo.end()) //update hwm info { oldHwm = it->hwm; - } // save hwm for the old extent colWidth = colStructList[i].colWidth; @@ -2032,6 +2031,7 @@ timer.stop("tokenize"); else return ERR_INVALID_PARAM; + } //update hwm for the new extent if (newExtent) { @@ -2043,6 +2043,7 @@ timer.stop("tokenize"); break; it++; } + colWidth = newColStructList[i].colWidth; succFlag = colOp->calculateRowId(lastRidNew, BYTE_PER_BLOCK/colWidth, colWidth, curFbo, curBio); if (succFlag) { @@ -2107,6 +2108,9 @@ timer.start("writeColumnRec"); curFbo)); } } + else + return ERR_INVALID_PARAM; + } } // If we create a new extent for this batch for (unsigned i = 0; i < newColStructList.size(); i++) @@ -2123,7 +2127,8 @@ timer.start("writeColumnRec"); curFbo)); } } - } + else + return ERR_INVALID_PARAM; } if (lbids.size() > 0) @@ -4604,7 +4609,7 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid, bool versioning) { int rc = 0; - void* valArray; + void* valArray = NULL; string segFile; Column curCol; ColStructList::size_type totalColumn; @@ -4629,132 +4634,135 @@ StopWatch timer; totalRow2 = 0; } - valArray = malloc(sizeof(uint64_t) * totalRow1); - - if (totalRow1 == 0) + // It is possible totalRow1 is zero but totalRow2 has values + if ((totalRow1 == 0) && (totalRow2 == 0)) return rc; TableMetaData* aTbaleMetaData = TableMetaData::makeTableMetaData(tableOid); - for (i = 0; i < totalColumn; i++) + if (totalRow1) { - //@Bug 2205 Check if all rows go to the new extent - //Write the first batch - RID * firstPart = rowIdArray; - ColumnOp* colOp = m_colOp[op(colStructList[i].fCompressionType)]; - - // set params - colOp->initColumn(curCol); - // need to pass real dbRoot, partition, and segment to setColParam - colOp->setColParam(curCol, 0, colStructList[i].colWidth, - colStructList[i].colDataType, colStructList[i].colType, colStructList[i].dataOid, - colStructList[i].fCompressionType, colStructList[i].fColDbRoot, - colStructList[i].fColPartition, colStructList[i].fColSegment); - - ColExtsInfo aColExtsInfo = aTbaleMetaData->getColExtsInfo(colStructList[i].dataOid); - ColExtsInfo::iterator it = aColExtsInfo.begin(); - while (it != aColExtsInfo.end()) + valArray = malloc(sizeof(uint64_t) * totalRow1); + for (i = 0; i < totalColumn; i++) { - if ((it->dbRoot == colStructList[i].fColDbRoot) && (it->partNum == colStructList[i].fColPartition) && (it->segNum == colStructList[i].fColSegment)) - break; - it++; - } + //@Bug 2205 Check if all rows go to the new extent + //Write the first batch + RID * firstPart = rowIdArray; + ColumnOp* colOp = m_colOp[op(colStructList[i].fCompressionType)]; - if (it == aColExtsInfo.end()) //add this one to the list - { - ColExtInfo aExt; - aExt.dbRoot =colStructList[i].fColDbRoot; - aExt.partNum = colStructList[i].fColPartition; - aExt.segNum = colStructList[i].fColSegment; - aExt.compType = colStructList[i].fCompressionType; - aColExtsInfo.push_back(aExt); - aTbaleMetaData->setColExtsInfo(colStructList[i].dataOid, aColExtsInfo); - } + // set params + colOp->initColumn(curCol); + // need to pass real dbRoot, partition, and segment to setColParam + colOp->setColParam(curCol, 0, colStructList[i].colWidth, + colStructList[i].colDataType, colStructList[i].colType, colStructList[i].dataOid, + colStructList[i].fCompressionType, colStructList[i].fColDbRoot, + colStructList[i].fColPartition, colStructList[i].fColSegment); - rc = colOp->openColumnFile(curCol, segFile, useTmpSuffix, IO_BUFF_SIZE); // @bug 5572 HDFS tmp file - if (rc != NO_ERROR) - break; - - // handling versioning - vector rangeList; - if (versioning) - { - rc = processVersionBuffer(curCol.dataFile.pFile, txnid, colStructList[i], - colStructList[i].colWidth, totalRow1, firstPart, rangeList); - if (rc != NO_ERROR) { - if (colStructList[i].fCompressionType == 0) - { - curCol.dataFile.pFile->flush(); - } - - BRMWrapper::getInstance()->writeVBEnd(txnid, rangeList); - break; - } - } - - //totalRow1 -= totalRow2; - // have to init the size here - // nullArray = (bool*) malloc(sizeof(bool) * totalRow); - uint8_t tmp8; - uint16_t tmp16; - uint32_t tmp32; - for (size_t j = 0; j < totalRow1; j++) - { - uint64_t curValue = colValueList[((totalRow1 + totalRow2)*i) + j]; - switch (colStructList[i].colType) + ColExtsInfo aColExtsInfo = aTbaleMetaData->getColExtsInfo(colStructList[i].dataOid); + ColExtsInfo::iterator it = aColExtsInfo.begin(); + while (it != aColExtsInfo.end()) { - case WriteEngine::WR_VARBINARY : // treat same as char for now - case WriteEngine::WR_CHAR: - case WriteEngine::WR_BLOB: - case WriteEngine::WR_TEXT: - ((uint64_t*)valArray)[j] = curValue; - break; - case WriteEngine::WR_INT: - case WriteEngine::WR_UINT: - case WriteEngine::WR_FLOAT: - tmp32 = curValue; - ((uint32_t*)valArray)[j] = tmp32; - break; - case WriteEngine::WR_ULONGLONG: - case WriteEngine::WR_LONGLONG: - case WriteEngine::WR_DOUBLE: - case WriteEngine::WR_TOKEN: - ((uint64_t*)valArray)[j] = curValue; - break; - case WriteEngine::WR_BYTE: - case WriteEngine::WR_UBYTE: - tmp8 = curValue; - ((uint8_t*)valArray)[j] = tmp8; - break; - case WriteEngine::WR_SHORT: - case WriteEngine::WR_USHORT: - tmp16 = curValue; - ((uint16_t*)valArray)[j] = tmp16; + if ((it->dbRoot == colStructList[i].fColDbRoot) && (it->partNum == colStructList[i].fColPartition) && (it->segNum == colStructList[i].fColSegment)) break; + it++; } + + if (it == aColExtsInfo.end()) //add this one to the list + { + ColExtInfo aExt; + aExt.dbRoot =colStructList[i].fColDbRoot; + aExt.partNum = colStructList[i].fColPartition; + aExt.segNum = colStructList[i].fColSegment; + aExt.compType = colStructList[i].fCompressionType; + aColExtsInfo.push_back(aExt); + aTbaleMetaData->setColExtsInfo(colStructList[i].dataOid, aColExtsInfo); + } + + rc = colOp->openColumnFile(curCol, segFile, useTmpSuffix, IO_BUFF_SIZE); // @bug 5572 HDFS tmp file + if (rc != NO_ERROR) + break; + + // handling versioning + vector rangeList; + if (versioning) + { + rc = processVersionBuffer(curCol.dataFile.pFile, txnid, colStructList[i], + colStructList[i].colWidth, totalRow1, firstPart, rangeList); + if (rc != NO_ERROR) { + if (colStructList[i].fCompressionType == 0) + { + curCol.dataFile.pFile->flush(); + } + + BRMWrapper::getInstance()->writeVBEnd(txnid, rangeList); + break; + } + } + + //totalRow1 -= totalRow2; + // have to init the size here + // nullArray = (bool*) malloc(sizeof(bool) * totalRow); + uint8_t tmp8; + uint16_t tmp16; + uint32_t tmp32; + for (size_t j = 0; j < totalRow1; j++) + { + uint64_t curValue = colValueList[((totalRow1 + totalRow2)*i) + j]; + switch (colStructList[i].colType) + { + case WriteEngine::WR_VARBINARY : // treat same as char for now + case WriteEngine::WR_CHAR: + case WriteEngine::WR_BLOB: + case WriteEngine::WR_TEXT: + ((uint64_t*)valArray)[j] = curValue; + break; + case WriteEngine::WR_INT: + case WriteEngine::WR_UINT: + case WriteEngine::WR_FLOAT: + tmp32 = curValue; + ((uint32_t*)valArray)[j] = tmp32; + break; + case WriteEngine::WR_ULONGLONG: + case WriteEngine::WR_LONGLONG: + case WriteEngine::WR_DOUBLE: + case WriteEngine::WR_TOKEN: + ((uint64_t*)valArray)[j] = curValue; + break; + case WriteEngine::WR_BYTE: + case WriteEngine::WR_UBYTE: + tmp8 = curValue; + ((uint8_t*)valArray)[j] = tmp8; + break; + case WriteEngine::WR_SHORT: + case WriteEngine::WR_USHORT: + tmp16 = curValue; + ((uint16_t*)valArray)[j] = tmp16; + break; + } + } + + +#ifdef PROFILE + timer.start("writeRow "); +#endif + rc = colOp->writeRow(curCol, totalRow1, firstPart, valArray); +#ifdef PROFILE + timer.stop("writeRow "); +#endif + colOp->closeColumnFile(curCol); + + if (versioning) + BRMWrapper::getInstance()->writeVBEnd(txnid, rangeList); + + // check error + if (rc != NO_ERROR) + break; + + } // end of for (i = 0 + if (valArray != NULL) + { + free(valArray); + valArray = NULL; } - - -#ifdef PROFILE -timer.start("writeRow "); -#endif - rc = colOp->writeRow(curCol, totalRow1, firstPart, valArray); -#ifdef PROFILE -timer.stop("writeRow "); -#endif - colOp->closeColumnFile(curCol); - - if (versioning) - BRMWrapper::getInstance()->writeVBEnd(txnid, rangeList); - - // check error - if (rc != NO_ERROR) - break; - - } // end of for (i = 0 - if (valArray != NULL) - { - free(valArray); - valArray = NULL; } // MCOL-1176 - Write second extent From 5b3db71c8fd7a512abf22dae32991afcee5a1979 Mon Sep 17 00:00:00 2001 From: David Hall Date: Wed, 6 Jun 2018 10:34:35 -0500 Subject: [PATCH 040/123] MCOL-1201 fix some regressions --- dbcon/joblist/tupleaggregatestep.cpp | 52 +++++++++++++++++++++------- dbcon/mysql/ha_calpont_execplan.cpp | 26 +++++++++----- 2 files changed, 57 insertions(+), 21 deletions(-) diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index be0e2009d..0f981e68f 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -1505,10 +1505,17 @@ void TupleAggregateStep::prep1PhaseAggregate( typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); // If the param is const - ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) + if (udafc) { - funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + } + else + { + throw QueryDataExcept("prep1PhaseAggregate: UDAF multi function with no parms", aggregateFuncErr); } ++udafcParamIdx; } @@ -2123,10 +2130,17 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( multiParmIndexes.push_back(colAgg); ++colAgg; // If the param is const - ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) + if (udafc) { - funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + } + else + { + throw QueryDataExcept("prep1PhaseDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr); } ++udafcParamIdx; } @@ -3398,10 +3412,17 @@ void TupleAggregateStep::prep2PhasesAggregate( widthAggPm.push_back(width[colProj]); colAggPm++; // If the param is const - ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) + if (udafc) { - funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + } + else + { + throw QueryDataExcept("prep2PhasesAggregate: UDAF multi function with no parms", aggregateFuncErr); } ++udafcParamIdx; } @@ -4257,10 +4278,17 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( multiParmIndexes.push_back(colAggPm); colAggPm++; // If the param is const - ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) + if (udafc) { - funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) + { + funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; + } + } + else + { + throw QueryDataExcept("prep2PhasesDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr); } ++udafcParamIdx; } diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index d030d1855..8df06c6b4 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4165,7 +4165,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) rowCol->columnVec(selCols); (dynamic_cast(ac))->orderCols(orderCols); parm.reset(rowCol); - ac->aggParms().push_back(parm); + ac->aggParms().push_back(parm); if (gc->str_separator()) { @@ -4311,15 +4311,15 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); } - if (ac) - delete ac; + if (ac) + delete ac; return NULL; } - if (parm) - { - // MCOL-1201 multi-argument aggregate - ac->aggParms().push_back(parm); - } + if (parm) + { + // MCOL-1201 multi-argument aggregate + ac->aggParms().push_back(parm); + } } } @@ -10033,7 +10033,15 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro return ER_CHECK_NOT_IMPLEMENTED; } - (*coliter)->aggParms().push_back(minSc); + // Replace the last (presumably constant) object with minSc + if ((*coliter)->aggParms().empty()) + { + (*coliter)->aggParms().push_back(minSc); + } + else + { + (*coliter)->aggParms()[0] = minSc; + } } std::vector::iterator funciter; From 2b77f0f284135f9dad93acb0ef609fec2d1ec34f Mon Sep 17 00:00:00 2001 From: David Hall Date: Wed, 6 Jun 2018 16:51:47 -0500 Subject: [PATCH 041/123] MCOL-1201 Handle NULLs in Window functions correctly. --- utils/windowfunction/wf_udaf.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index 2876fbf7e..ee48360f1 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -503,6 +503,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // NULL flags uint32_t flags[getContext().getParameterCount()]; + bHasNull = false; for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { cc = static_cast(fConstantParms[k].get()); @@ -715,11 +716,11 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) } } } - // Skip if any value is NULL and respect nulls is off. - if (bHasNull) - { - continue; - } + } + // Skip if any value is NULL and respect nulls is off. + if (bHasNull) + { + continue; } getContext().setDataFlags(flags); From 09269af6d89de4d5d30c12f26c5d83742447b0a4 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Thu, 7 Jun 2018 12:16:55 +0100 Subject: [PATCH 042/123] MCOL-1417 Fix cpimport TIME saturation --- utils/dataconvert/dataconvert.cpp | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/utils/dataconvert/dataconvert.cpp b/utils/dataconvert/dataconvert.cpp index 6c66a07de..1a2e90a88 100644 --- a/utils/dataconvert/dataconvert.cpp +++ b/utils/dataconvert/dataconvert.cpp @@ -1907,6 +1907,7 @@ int64_t DataConvert::convertColumnTime( { status = 0; char* p; + char* retp = NULL; char* savePoint = NULL; p = const_cast(dataOrg); int64_t value = 0; @@ -1923,6 +1924,17 @@ int64_t DataConvert::convertColumnTime( return value; } + if (dataOrgLen == 0) + { + return value; + } + if (dataOrgLen < 3) + { + // Not enough chars to be a time + status = -1; + return value; + } + if (p[0] == '-') { isNeg = true; @@ -1931,9 +1943,9 @@ int64_t DataConvert::convertColumnTime( errno = 0; p = strtok_r(p, ":.", &savePoint); - inHour = strtol(p, 0, 10); + inHour = strtol(p, &retp, 10); - if (errno) + if (errno || !retp) { status = -1; return value; @@ -1947,9 +1959,9 @@ int64_t DataConvert::convertColumnTime( return value; } - inMinute = strtol(p, 0, 10); + inMinute = strtol(p, &retp, 10); - if (errno) + if (errno || !retp) { status = -1; return value; @@ -1963,9 +1975,9 @@ int64_t DataConvert::convertColumnTime( return value; } - inSecond = strtol(p, 0, 10); + inSecond = strtol(p, &retp, 10); - if (errno) + if (errno || !retp) { status = -1; return value; @@ -1975,9 +1987,9 @@ int64_t DataConvert::convertColumnTime( if (p != NULL) { - inMicrosecond = strtol(p, 0, 10); + inMicrosecond = strtol(p, &retp, 10); - if (errno) + if (errno || !retp) { status = -1; return value; From e2141e0b69ab82ba580faf2dcef1b0f5b673b32e Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Thu, 7 Jun 2018 12:41:44 +0100 Subject: [PATCH 043/123] MCOL-1418 Fix negative time storage for LDI --- dbcon/mysql/ha_calpont_dml.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbcon/mysql/ha_calpont_dml.cpp b/dbcon/mysql/ha_calpont_dml.cpp index cf103a801..a9b64b757 100644 --- a/dbcon/mysql/ha_calpont_dml.cpp +++ b/dbcon/mysql/ha_calpont_dml.cpp @@ -895,6 +895,10 @@ int ha_calpont_impl_write_batch_row_(uchar* buf, TABLE* table, cal_impl_if::cal_ longlong tmp = my_time_packed_from_binary(pos, table->field[colpos]->decimals()); TIME_from_longlong_time_packed(<ime, tmp); + if (ltime.neg) + { + fprintf(ci.filePtr, "-"); + } if (!ltime.second_part) { fprintf(ci.filePtr, "%02d:%02d:%02d%c", From 58f10055aa2826ec072beeecfda9567d8da31fd2 Mon Sep 17 00:00:00 2001 From: David Hall Date: Thu, 7 Jun 2018 10:05:35 -0500 Subject: [PATCH 044/123] MCOL-1201 Add regr_avgx to included functions --- dbcon/mysql/install_calpont_mysql.sh | 1 + utils/winport/win_setup_mysql_part4.sql | 1 + 2 files changed, 2 insertions(+) diff --git a/dbcon/mysql/install_calpont_mysql.sh b/dbcon/mysql/install_calpont_mysql.sh index e8eb5b2b0..e04371549 100755 --- a/dbcon/mysql/install_calpont_mysql.sh +++ b/dbcon/mysql/install_calpont_mysql.sh @@ -84,6 +84,7 @@ CREATE FUNCTION idbpartition RETURNS STRING soname 'libcalmysql.so'; CREATE FUNCTION idblocalpm RETURNS INTEGER soname 'libcalmysql.so'; CREATE FUNCTION mcssystemready RETURNS INTEGER soname 'libcalmysql.so'; CREATE FUNCTION mcssystemreadonly RETURNS INTEGER soname 'libcalmysql.so'; +CREATE AGGREGATE FUNCTION regr_avgx RETURNS REAL soname 'libcalmysql.dll'; CREATE DATABASE IF NOT EXISTS infinidb_vtable; CREATE DATABASE IF NOT EXISTS infinidb_querystats; diff --git a/utils/winport/win_setup_mysql_part4.sql b/utils/winport/win_setup_mysql_part4.sql index 3b75fbe98..d884214ec 100644 --- a/utils/winport/win_setup_mysql_part4.sql +++ b/utils/winport/win_setup_mysql_part4.sql @@ -18,4 +18,5 @@ CREATE FUNCTION idbextentmin RETURNS STRING soname 'libcalmysql.dll'; CREATE FUNCTION idbextentmax RETURNS STRING soname 'libcalmysql.dll'; CREATE FUNCTION idbpartition RETURNS STRING soname 'libcalmysql.dll'; CREATE FUNCTION idblocalpm RETURNS INTEGER soname 'libcalmysql.dll'; +CREATE AGGREGATE FUNCTION regr_avgx RETURNS REAL soname 'libcalmysql.dll'; From d9e6ba90ad345296a479336920c10eec3e44e9c2 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Fri, 8 Jun 2018 14:58:08 +0100 Subject: [PATCH 045/123] MCOL-1433 Fix several functions for TIME handling This fixes hex() so that it outputs the hex of the ASCII for the time data to make it aligned with MariaDB. It also fixes the following functions so that they use NOW() as a DATETIME with the input TIME added to it: * weekday() * yearweek() * monthname() * last_day() * year() * weekofyear() * week() * to_days() * quarter() * month() * dayofyear() * dayofweek() * dayofmonth() * day() * date() --- utils/funcexp/func_add_time.cpp | 174 ----------------------------- utils/funcexp/func_date.cpp | 17 +++ utils/funcexp/func_day.cpp | 13 +++ utils/funcexp/func_dayname.cpp | 14 +++ utils/funcexp/func_dayofweek.cpp | 14 +++ utils/funcexp/func_dayofyear.cpp | 14 +++ utils/funcexp/func_hex.cpp | 1 + utils/funcexp/func_last_day.cpp | 13 +++ utils/funcexp/func_month.cpp | 11 ++ utils/funcexp/func_monthname.cpp | 12 ++ utils/funcexp/func_quarter.cpp | 11 ++ utils/funcexp/func_to_days.cpp | 18 +++ utils/funcexp/func_week.cpp | 13 +++ utils/funcexp/func_weekday.cpp | 13 +++ utils/funcexp/func_year.cpp | 11 ++ utils/funcexp/func_yearweek.cpp | 13 +++ utils/funcexp/functor.cpp | 185 +++++++++++++++++++++++++++++++ utils/funcexp/functor.h | 7 ++ 18 files changed, 380 insertions(+), 174 deletions(-) diff --git a/utils/funcexp/func_add_time.cpp b/utils/funcexp/func_add_time.cpp index c3d5d7d85..db854c085 100644 --- a/utils/funcexp/func_add_time.cpp +++ b/utils/funcexp/func_add_time.cpp @@ -36,180 +36,6 @@ using namespace dataconvert; #include "functor_dtm.h" #include "funchelpers.h" -namespace -{ -using namespace funcexp; - -int64_t addTime(DateTime& dt1, Time& dt2) -{ - DateTime dt; - dt.year = 0; - dt.month = 0; - dt.day = 0; - dt.hour = 0; - dt.minute = 0; - dt.second = 0; - dt.msecond = 0; - - int64_t month, day, hour, min, sec, msec, tmp; - msec = (signed)(dt1.msecond + dt2.msecond); - dt.msecond = tmp = msec % 1000000; - - if (tmp < 0) - { - dt.msecond = tmp + 1000000; - dt2.second--; - } - - sec = (signed)(dt1.second + dt2.second + msec / 1000000); - dt.second = tmp = sec % 60; - - if (tmp < 0) - { - dt.second = tmp + 60; - dt2.minute--; - } - - min = (signed)(dt1.minute + dt2.minute + sec / 60); - dt.minute = tmp = min % 60; - - if (tmp < 0) - { - dt.minute = tmp + 60; - dt2.hour--; - } - - hour = (signed)(dt1.hour + dt2.hour + min / 60); - - if ((hour < 0) || (hour > 23)) - { - dt2.day = hour / 24; - hour = hour % 24; - } - - if (hour < 0) - { - dt.hour = hour + 24; - dt2.day--; - } - else - { - dt.hour = hour; - } - - day = (signed)(dt1.day + dt2.day); - - - if (isLeapYear(dt1.year) && dt1.month == 2) - day--; - - month = dt1.month; - int addyear = 0; - - if (day < 0) - { - int monthSave = month; - - while (day <= 0) - { - month = (month == 1 ? 12 : month - 1); - - for (; day <= 0 && month > 0; month--) - day += getDaysInMonth(month, dt1.year); - - month++; -// month=12; - } - - if ( month > monthSave ) - addyear--; - } - else - { - int monthSave = month; - - while (day > getDaysInMonth(month, dt1.year)) - { - for (; day > getDaysInMonth(month, dt1.year) && month <= 12; month++) - day -= getDaysInMonth(month, dt1.year); - - if (month > 12) - month = 1; - } - - if ( month < monthSave ) - addyear++; - } - - dt.day = day; - dt.month = month; - dt.year = dt1.year + addyear; - - return *(reinterpret_cast(&dt)); -} - -int64_t addTime(Time& dt1, Time& dt2) -{ - Time dt; - dt.is_neg = false; - dt.hour = 0; - dt.minute = 0; - dt.second = 0; - dt.msecond = 0; - - int64_t min, sec, msec, tmp; - msec = (signed)(dt1.msecond + dt2.msecond); - dt.msecond = tmp = msec % 1000000; - - if (tmp < 0) - { - dt.msecond = tmp + 1000000; - dt2.second--; - } - - sec = (signed)(dt1.second + dt2.second + msec / 1000000); - dt.second = tmp = sec % 60; - - if (tmp < 0) - { - dt.second = tmp + 60; - dt2.minute--; - } - - min = (signed)(dt1.minute + dt2.minute + sec / 60); - dt.minute = tmp = min % 60; - - if (tmp < 0) - { - dt.minute = tmp + 60; - dt2.hour--; - } - - dt.hour = tmp = (signed)(dt1.hour + dt2.hour + min / 60); - - // Saturation - if (tmp > 838) - { - dt.hour = 838; - dt.minute = 59; - dt.second = 59; - dt.msecond = 999999; - } - else if (tmp < -838) - { - dt.is_neg = true; - dt.hour = -838; - dt.minute = 59; - dt.second = 59; - dt.msecond = 999999; - } - - return *(reinterpret_cast(&dt)); -} - - -} - namespace funcexp { diff --git a/utils/funcexp/func_date.cpp b/utils/funcexp/func_date.cpp index 5f700e446..9f875b374 100644 --- a/utils/funcexp/func_date.cpp +++ b/utils/funcexp/func_date.cpp @@ -56,6 +56,9 @@ int64_t Func_date::getIntVal(rowgroup::Row& row, string value = ""; + DateTime aDateTime; + Time aTime; + switch (type) { case execplan::CalpontSystemCatalog::DATE: @@ -72,6 +75,20 @@ int64_t Func_date::getIntVal(rowgroup::Row& row, break; } + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + { + int64_t val; + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + value = dataconvert::DataConvert::datetimeToString(val); + value = value.substr(0, 10); + break; + } + + case execplan::CalpontSystemCatalog::BIGINT: case execplan::CalpontSystemCatalog::INT: case execplan::CalpontSystemCatalog::MEDINT: diff --git a/utils/funcexp/func_day.cpp b/utils/funcexp/func_day.cpp index bbd5edfcb..a25a41bcf 100644 --- a/utils/funcexp/func_day.cpp +++ b/utils/funcexp/func_day.cpp @@ -49,6 +49,9 @@ int64_t Func_day::getIntVal(rowgroup::Row& row, { int64_t val = 0; + DateTime aDateTime; + Time aTime; + switch (parm[0]->data()->resultType().colDataType) { case CalpontSystemCatalog::DATE: @@ -59,6 +62,16 @@ int64_t Func_day::getIntVal(rowgroup::Row& row, val = parm[0]->data()->getIntVal(row, isNull); return (uint32_t)((val >> 38) & 0x3f); + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + return (uint32_t)((val >> 38) & 0x3f); + break; + + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_dayname.cpp b/utils/funcexp/func_dayname.cpp index 3825bc1a3..0122e4898 100644 --- a/utils/funcexp/func_dayname.cpp +++ b/utils/funcexp/func_dayname.cpp @@ -54,6 +54,9 @@ int64_t Func_dayname::getIntVal(rowgroup::Row& row, int64_t val = 0; int32_t dayofweek = 0; + DateTime aDateTime; + Time aTime; + switch (parm[0]->data()->resultType().colDataType) { case CalpontSystemCatalog::DATE: @@ -70,6 +73,17 @@ int64_t Func_dayname::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + year = (uint32_t)((val >> 48) & 0xffff); + month = (uint32_t)((val >> 44) & 0xf); + day = (uint32_t)((val >> 38) & 0x3f); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_dayofweek.cpp b/utils/funcexp/func_dayofweek.cpp index a152ee15f..1ac549060 100644 --- a/utils/funcexp/func_dayofweek.cpp +++ b/utils/funcexp/func_dayofweek.cpp @@ -52,6 +52,9 @@ int64_t Func_dayofweek::getIntVal(rowgroup::Row& row, uint32_t day = 0; int64_t val = 0; + DateTime aDateTime; + Time aTime; + switch (parm[0]->data()->resultType().colDataType) { case CalpontSystemCatalog::DATE: @@ -68,6 +71,17 @@ int64_t Func_dayofweek::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + year = (uint32_t)((val >> 48) & 0xffff); + month = (uint32_t)((val >> 44) & 0xf); + day = (uint32_t)((val >> 38) & 0x3f); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_dayofyear.cpp b/utils/funcexp/func_dayofyear.cpp index 65c017202..0ec48f22f 100644 --- a/utils/funcexp/func_dayofyear.cpp +++ b/utils/funcexp/func_dayofyear.cpp @@ -52,6 +52,9 @@ int64_t Func_dayofyear::getIntVal(rowgroup::Row& row, uint32_t day = 0; int64_t val = 0; + DateTime aDateTime; + Time aTime; + switch (parm[0]->data()->resultType().colDataType) { case CalpontSystemCatalog::DATE: @@ -68,6 +71,17 @@ int64_t Func_dayofyear::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + year = (uint32_t)((val >> 48) & 0xffff); + month = (uint32_t)((val >> 44) & 0xf); + day = (uint32_t)((val >> 38) & 0x3f); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_hex.cpp b/utils/funcexp/func_hex.cpp index 79ed1d57e..83106a9be 100644 --- a/utils/funcexp/func_hex.cpp +++ b/utils/funcexp/func_hex.cpp @@ -78,6 +78,7 @@ string Func_hex::getStrVal(rowgroup::Row& row, case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::DATETIME: case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::TIME: { const string& arg = parm[0]->data()->getStrVal(row, isNull); scoped_array hexPtr(new char[strlen(arg.c_str()) * 2 + 1]); diff --git a/utils/funcexp/func_last_day.cpp b/utils/funcexp/func_last_day.cpp index 38ba46ccb..757e19d77 100644 --- a/utils/funcexp/func_last_day.cpp +++ b/utils/funcexp/func_last_day.cpp @@ -53,6 +53,8 @@ int64_t Func_last_day::getIntVal(rowgroup::Row& row, uint32_t month = 0; uint32_t day = 0; int64_t val = 0; + DateTime aDateTime; + Time aTime; switch (parm[0]->data()->resultType().colDataType) { @@ -70,6 +72,17 @@ int64_t Func_last_day::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + year = (uint32_t)((val >> 48) & 0xffff); + month = (uint32_t)((val >> 44) & 0xf); + day = (uint32_t)((val >> 38) & 0x3f); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_month.cpp b/utils/funcexp/func_month.cpp index ba285f348..4269ae22f 100644 --- a/utils/funcexp/func_month.cpp +++ b/utils/funcexp/func_month.cpp @@ -48,6 +48,8 @@ int64_t Func_month::getIntVal(rowgroup::Row& row, CalpontSystemCatalog::ColType& op_ct) { int64_t val = 0; + DateTime aDateTime; + Time aTime; switch (parm[0]->data()->resultType().colDataType) { @@ -59,6 +61,15 @@ int64_t Func_month::getIntVal(rowgroup::Row& row, val = parm[0]->data()->getIntVal(row, isNull); return (unsigned)((val >> 44) & 0xf); + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + return (unsigned)((val >> 44) & 0xf); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_monthname.cpp b/utils/funcexp/func_monthname.cpp index dbe5aa513..8a22b4b67 100644 --- a/utils/funcexp/func_monthname.cpp +++ b/utils/funcexp/func_monthname.cpp @@ -75,6 +75,8 @@ int64_t Func_monthname::getIntVal(rowgroup::Row& row, CalpontSystemCatalog::ColType& op_ct) { int64_t val = 0; + DateTime aDateTime; + Time aTime; switch (parm[0]->data()->resultType().colDataType) { @@ -86,6 +88,16 @@ int64_t Func_monthname::getIntVal(rowgroup::Row& row, val = parm[0]->data()->getIntVal(row, isNull); return (unsigned)((val >> 44) & 0xf); + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + return (unsigned)((val >> 44) & 0xf); + break; + + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_quarter.cpp b/utils/funcexp/func_quarter.cpp index 1603ef31f..c819cbdbe 100644 --- a/utils/funcexp/func_quarter.cpp +++ b/utils/funcexp/func_quarter.cpp @@ -50,6 +50,8 @@ int64_t Func_quarter::getIntVal(rowgroup::Row& row, { // try to cast to date/datetime int64_t val = 0, month = 0; + DateTime aDateTime; + Time aTime; switch (parm[0]->data()->resultType().colDataType) { @@ -63,6 +65,15 @@ int64_t Func_quarter::getIntVal(rowgroup::Row& row, month = (val >> 44) & 0xf; break; + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + month = (uint32_t)((val >> 44) & 0xf); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_to_days.cpp b/utils/funcexp/func_to_days.cpp index 773ec4a8e..5c58b1b11 100644 --- a/utils/funcexp/func_to_days.cpp +++ b/utils/funcexp/func_to_days.cpp @@ -59,6 +59,9 @@ int64_t Func_to_days::getIntVal(rowgroup::Row& row, month = 0, day = 0; + DateTime aDateTime; + Time aTime; + switch (type) { case execplan::CalpontSystemCatalog::DATE: @@ -82,6 +85,21 @@ int64_t Func_to_days::getIntVal(rowgroup::Row& row, break; } + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + { + int64_t val; + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + year = (uint32_t)((val >> 48) & 0xffff); + month = (uint32_t)((val >> 44) & 0xf); + day = (uint32_t)((val >> 38) & 0x3f); + return helpers::calc_mysql_daynr(year, month, day); + break; + } + case execplan::CalpontSystemCatalog::VARCHAR: // including CHAR' case execplan::CalpontSystemCatalog::CHAR: case execplan::CalpontSystemCatalog::TEXT: diff --git a/utils/funcexp/func_week.cpp b/utils/funcexp/func_week.cpp index 65145052f..9cb869c1e 100644 --- a/utils/funcexp/func_week.cpp +++ b/utils/funcexp/func_week.cpp @@ -53,6 +53,8 @@ int64_t Func_week::getIntVal(rowgroup::Row& row, int64_t val = 0; int16_t mode = 0; + DateTime aDateTime; + Time aTime; if (parm.size() > 1) // mode value mode = parm[1]->data()->getIntVal(row, isNull); @@ -73,6 +75,17 @@ int64_t Func_week::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + year = (uint32_t)((val >> 48) & 0xffff); + month = (uint32_t)((val >> 44) & 0xf); + day = (uint32_t)((val >> 38) & 0x3f); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_weekday.cpp b/utils/funcexp/func_weekday.cpp index 6022a860f..67f535f1f 100644 --- a/utils/funcexp/func_weekday.cpp +++ b/utils/funcexp/func_weekday.cpp @@ -52,6 +52,8 @@ int64_t Func_weekday::getIntVal(rowgroup::Row& row, uint32_t month = 0; uint32_t day = 0; int64_t val = 0; + DateTime aDateTime; + Time aTime; switch (parm[0]->data()->resultType().colDataType) { @@ -69,6 +71,17 @@ int64_t Func_weekday::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + year = (uint32_t)((val >> 48) & 0xffff); + month = (uint32_t)((val >> 44) & 0xf); + day = (uint32_t)((val >> 38) & 0x3f); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_year.cpp b/utils/funcexp/func_year.cpp index 119881499..8b3f79fa0 100644 --- a/utils/funcexp/func_year.cpp +++ b/utils/funcexp/func_year.cpp @@ -48,6 +48,8 @@ int64_t Func_year::getIntVal(rowgroup::Row& row, CalpontSystemCatalog::ColType& op_ct) { int64_t val = 0; + DateTime aDateTime; + Time aTime; switch (parm[0]->data()->resultType().colDataType) { @@ -59,6 +61,15 @@ int64_t Func_year::getIntVal(rowgroup::Row& row, val = parm[0]->data()->getIntVal(row, isNull); return (unsigned)((val >> 48) & 0xffff); + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + return (unsigned)((val >> 48) & 0xffff); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/func_yearweek.cpp b/utils/funcexp/func_yearweek.cpp index 84b8ca0a4..749491e11 100644 --- a/utils/funcexp/func_yearweek.cpp +++ b/utils/funcexp/func_yearweek.cpp @@ -54,6 +54,8 @@ int64_t Func_yearweek::getIntVal(rowgroup::Row& row, int64_t val = 0; int16_t mode = 0; // default to 2 + DateTime aDateTime; + Time aTime; if (parm.size() > 1) // mode value mode = parm[1]->data()->getIntVal(row, isNull); @@ -76,6 +78,17 @@ int64_t Func_yearweek::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; + // Time adds to now() and then gets value + case CalpontSystemCatalog::TIME: + aDateTime = static_cast(nowDatetime()); + aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + val = addTime(aDateTime, aTime); + year = (uint32_t)((val >> 48) & 0xffff); + month = (uint32_t)((val >> 44) & 0xf); + day = (uint32_t)((val >> 38) & 0x3f); + break; + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: diff --git a/utils/funcexp/functor.cpp b/utils/funcexp/functor.cpp index b74812ee9..f722dd557 100644 --- a/utils/funcexp/functor.cpp +++ b/utils/funcexp/functor.cpp @@ -28,6 +28,8 @@ #include #include #include +#include + using namespace std; #include "joblisttypes.h" @@ -145,6 +147,189 @@ int64_t Func::intToTime(int64_t i) return i; } +int64_t Func::nowDatetime() +{ + DateTime result; + boost::posix_time::ptime now = boost::posix_time::microsec_clock::local_time(); + result.year = now.date().year(); + result.month = now.date().month(); + result.day = now.date().day(); + result.hour = now.time_of_day().hours(); + result.minute = now.time_of_day().minutes(); + result.second = now.time_of_day().seconds(); + result.msecond = now.time_of_day().total_microseconds(); + + return (int64_t) * (reinterpret_cast(&result)); +} + +int64_t Func::addTime(DateTime& dt1, Time& dt2) +{ + DateTime dt; + dt.year = 0; + dt.month = 0; + dt.day = 0; + dt.hour = 0; + dt.minute = 0; + dt.second = 0; + dt.msecond = 0; + + int64_t month, day, hour, min, sec, msec, tmp; + msec = (signed)(dt1.msecond + dt2.msecond); + dt.msecond = tmp = msec % 1000000; + + if (tmp < 0) + { + dt.msecond = tmp + 1000000; + dt2.second--; + } + + sec = (signed)(dt1.second + dt2.second + msec / 1000000); + dt.second = tmp = sec % 60; + + if (tmp < 0) + { + dt.second = tmp + 60; + dt2.minute--; + } + + min = (signed)(dt1.minute + dt2.minute + sec / 60); + dt.minute = tmp = min % 60; + + if (tmp < 0) + { + dt.minute = tmp + 60; + dt2.hour--; + } + + hour = (signed)(dt1.hour + dt2.hour + min / 60); + + if ((hour < 0) || (hour > 23)) + { + dt2.day = hour / 24; + hour = hour % 24; + } + + if (hour < 0) + { + dt.hour = hour + 24; + dt2.day--; + } + else + { + dt.hour = hour; + } + + day = (signed)(dt1.day + dt2.day); + + + if (isLeapYear(dt1.year) && dt1.month == 2) + day--; + + month = dt1.month; + int addyear = 0; + + if (day < 0) + { + int monthSave = month; + + while (day <= 0) + { + month = (month == 1 ? 12 : month - 1); + + for (; day <= 0 && month > 0; month--) + day += getDaysInMonth(month, dt1.year); + + month++; +// month=12; + } + + if ( month > monthSave ) + addyear--; + } + else + { + int monthSave = month; + + while (day > getDaysInMonth(month, dt1.year)) + { + for (; day > getDaysInMonth(month, dt1.year) && month <= 12; month++) + day -= getDaysInMonth(month, dt1.year); + + if (month > 12) + month = 1; + } + + if ( month < monthSave ) + addyear++; + } + + dt.day = day; + dt.month = month; + dt.year = dt1.year + addyear; + + return *(reinterpret_cast(&dt)); +} + +int64_t Func::addTime(Time& dt1, Time& dt2) +{ + Time dt; + dt.is_neg = false; + dt.hour = 0; + dt.minute = 0; + dt.second = 0; + dt.msecond = 0; + + int64_t min, sec, msec, tmp; + msec = (signed)(dt1.msecond + dt2.msecond); + dt.msecond = tmp = msec % 1000000; + + if (tmp < 0) + { + dt.msecond = tmp + 1000000; + dt2.second--; + } + + sec = (signed)(dt1.second + dt2.second + msec / 1000000); + dt.second = tmp = sec % 60; + + if (tmp < 0) + { + dt.second = tmp + 60; + dt2.minute--; + } + + min = (signed)(dt1.minute + dt2.minute + sec / 60); + dt.minute = tmp = min % 60; + + if (tmp < 0) + { + dt.minute = tmp + 60; + dt2.hour--; + } + + dt.hour = tmp = (signed)(dt1.hour + dt2.hour + min / 60); + + // Saturation + if (tmp > 838) + { + dt.hour = 838; + dt.minute = 59; + dt.second = 59; + dt.msecond = 999999; + } + else if (tmp < -838) + { + dt.is_neg = true; + dt.hour = -838; + dt.minute = 59; + dt.second = 59; + dt.msecond = 999999; + } + + return *(reinterpret_cast(&dt)); +} + + string Func::intToString(int64_t i) { return helpers::intToString(i); diff --git a/utils/funcexp/functor.h b/utils/funcexp/functor.h index 9edb9bf62..a16917453 100644 --- a/utils/funcexp/functor.h +++ b/utils/funcexp/functor.h @@ -34,6 +34,9 @@ #include "calpontsystemcatalog.h" +#include "dataconvert.h" +using namespace dataconvert; + namespace rowgroup { class Row; @@ -162,6 +165,10 @@ protected: virtual std::string intToString(int64_t); virtual std::string doubleToString(double); + virtual int64_t nowDatetime(); + virtual int64_t addTime(DateTime& dt1, Time& dt2); + virtual int64_t addTime(Time& dt1, Time& dt2); + std::string fFuncName; private: From 250d90a9bc695de73c485e79a27b7e7895ca5c02 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Thu, 14 Jun 2018 14:43:37 +0100 Subject: [PATCH 046/123] MCOL-1474 Catch errors in PriorityThreadPool PriorityThreadPool errors cause crashes in PrimProc. This patch catches the errors and causes the thread to end cleanly. --- utils/threadpool/prioritythreadpool.cpp | 151 ++++++++++++++++-------- 1 file changed, 100 insertions(+), 51 deletions(-) diff --git a/utils/threadpool/prioritythreadpool.cpp b/utils/threadpool/prioritythreadpool.cpp index 461069e18..a0fc6a347 100644 --- a/utils/threadpool/prioritythreadpool.cpp +++ b/utils/threadpool/prioritythreadpool.cpp @@ -111,64 +111,113 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw() uint32_t rescheduleCount; uint32_t queueSize; - while (!_stop) { + try + { + while (!_stop) { - mutex::scoped_lock lk(mutex); + mutex::scoped_lock lk(mutex); - queue = pickAQueue(preferredQueue); - if (jobQueues[queue].empty()) { - newJob.wait(lk); - continue; - } + queue = pickAQueue(preferredQueue); + if (jobQueues[queue].empty()) { + newJob.wait(lk); + continue; + } - queueSize = jobQueues[queue].size(); - weight = 0; - // 3 conditions stop this thread from grabbing all jobs in the queue - // - // 1: The weight limit has been exceeded - // 2: The queue is empty - // 3: It has grabbed more than half of the jobs available & - // should leave some to the other threads + queueSize = jobQueues[queue].size(); + weight = 0; + // 3 conditions stop this thread from grabbing all jobs in the queue + // + // 1: The weight limit has been exceeded + // 2: The queue is empty + // 3: It has grabbed more than half of the jobs available & + // should leave some to the other threads - while ((weight < weightPerRun) && (!jobQueues[queue].empty()) - && (runList.size() <= queueSize/2)) { - runList.push_back(jobQueues[queue].front()); - jobQueues[queue].pop_front(); - weight += runList.back().weight; - } - lk.unlock(); + while ((weight < weightPerRun) && (!jobQueues[queue].empty()) + && (runList.size() <= queueSize/2)) { + runList.push_back(jobQueues[queue].front()); + jobQueues[queue].pop_front(); + weight += runList.back().weight; + } + lk.unlock(); - reschedule.resize(runList.size()); - rescheduleCount = 0; - for (i = 0; i < runList.size() && !_stop; i++) { - try { - reschedule[i] = false; - reschedule[i] = (*(runList[i].functor))(); - if (reschedule[i]) - rescheduleCount++; - } - catch (std::exception &e) { - cerr << e.what() << endl; - } - } + reschedule.resize(runList.size()); + rescheduleCount = 0; + for (i = 0; i < runList.size() && !_stop; i++) { + try { + reschedule[i] = false; + reschedule[i] = (*(runList[i].functor))(); + if (reschedule[i]) + rescheduleCount++; + } + catch (std::exception &e) { + cerr << e.what() << endl; + } + } - // no real work was done, prevent intensive busy waiting - if (rescheduleCount == runList.size()) - usleep(1000); + // no real work was done, prevent intensive busy waiting + if (rescheduleCount == runList.size()) + usleep(1000); - if (rescheduleCount > 0) { - lk.lock(); - for (i = 0; i < runList.size(); i++) - if (reschedule[i]) - addJob(runList[i], false); - if (rescheduleCount > 1) - newJob.notify_all(); - else - newJob.notify_one(); - lk.unlock(); - } - runList.clear(); - } + if (rescheduleCount > 0) { + lk.lock(); + for (i = 0; i < runList.size(); i++) + if (reschedule[i]) + addJob(runList[i], false); + if (rescheduleCount > 1) + newJob.notify_all(); + else + newJob.notify_one(); + lk.unlock(); + } + runList.clear(); + } + } + catch (std::exception &ex) + { + // Log the exception and exit this thread + try + { +#ifndef NOLOGGING + logging::Message::Args args; + logging::Message message(5); + args.add("threadFcn: Caught exception: "); + args.add(ex.what()); + + message.format( args ); + + logging::LoggingID lid(22); + logging::MessageLog ml(lid); + + ml.logErrorMessage( message ); +#endif + } + catch (...) + { + } + } + catch (...) + { + + // Log the exception and exit this thread + try + { +#ifndef NOLOGGING + logging::Message::Args args; + logging::Message message(6); + args.add("threadFcn: Caught unknown exception!"); + + message.format( args ); + + logging::LoggingID lid(22); + logging::MessageLog ml(lid); + + ml.logErrorMessage( message ); +#endif + } + catch (...) + { + } + } } void PriorityThreadPool::stop() From 40405c792af7e66129ae5c2e6f14bfc88b316d20 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Thu, 14 Jun 2018 16:28:06 +0100 Subject: [PATCH 047/123] MCOL-1474 Add error handling to PTP PriorityThreadPool didn't have very good error handling. If something failed it would just ignore whatever was being processed. This could lead to a query continuing without retreiving all of the required data. This patch adds error handling, sending a message back to the client and a log message. It also destroys and recreates the pool thread. --- primitives/primproc/primitiveserver.cpp | 46 +++++++++++++++++++++ utils/threadpool/prioritythreadpool.cpp | 54 +++++++++++++++++++++---- utils/threadpool/prioritythreadpool.h | 6 +++ 3 files changed, 98 insertions(+), 8 deletions(-) diff --git a/primitives/primproc/primitiveserver.cpp b/primitives/primproc/primitiveserver.cpp index 7989a17d4..f56f1ef6c 100755 --- a/primitives/primproc/primitiveserver.cpp +++ b/primitives/primproc/primitiveserver.cpp @@ -1818,12 +1818,22 @@ struct ReadThread switch(ismHdr->Command) { case DICT_CREATE_EQUALITY_FILTER: { PriorityThreadPool::Job job; + const uint8_t *buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t *) &buf[pos+6]); + job.uniqueID = *((uint32_t *) &buf[pos+10]); + job.sock = outIos; job.functor = boost::shared_ptr(new CreateEqualityFilter(bs)); OOBPool->addJob(job); break; } case DICT_DESTROY_EQUALITY_FILTER: { PriorityThreadPool::Job job; + const uint8_t *buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t *) &buf[pos+6]); + job.uniqueID = *((uint32_t *) &buf[pos+10]); + job.sock = outIos; job.functor = boost::shared_ptr(new DestroyEqualityFilter(bs)); OOBPool->addJob(job); break; @@ -1851,6 +1861,11 @@ struct ReadThread job.id = hdr->Hdr.UniqueID; job.weight = LOGICAL_BLOCK_RIDS; job.priority = hdr->Hdr.Priority; + const uint8_t *buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t *) &buf[pos+6]); + job.uniqueID = *((uint32_t *) &buf[pos+10]); + job.sock = outIos; if (hdr->flags & IS_SYSCAT) { //boost::thread t(DictScanJob(outIos, bs, writeLock)); // using already-existing threads may cut latency @@ -1889,6 +1904,12 @@ struct ReadThread job.id = bpps->getID(); job.weight = ismHdr->Size; job.priority = bpps->priority(); + const uint8_t *buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t *) &buf[pos+6]); + job.uniqueID = *((uint32_t *) &buf[pos+10]); + job.sock = outIos; + if (bpps->isSysCat()) { //boost::thread t(*bpps); // using already-existing threads may cut latency @@ -1904,6 +1925,11 @@ struct ReadThread case BATCH_PRIMITIVE_CREATE: { PriorityThreadPool::Job job; job.functor = boost::shared_ptr(new BPPHandler::Create(fBPPHandler, bs)); + const uint8_t *buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t *) &buf[pos+6]); + job.uniqueID = *((uint32_t *) &buf[pos+10]); + job.sock = outIos; OOBPool->addJob(job); //fBPPHandler->createBPP(*bs); break; @@ -1912,6 +1938,11 @@ struct ReadThread PriorityThreadPool::Job job; job.functor = boost::shared_ptr(new BPPHandler::AddJoiner(fBPPHandler, bs)); job.id = fBPPHandler->getUniqueID(bs, ismHdr->Command); + const uint8_t *buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t *) &buf[pos+6]); + job.uniqueID = *((uint32_t *) &buf[pos+10]); + job.sock = outIos; OOBPool->addJob(job); //fBPPHandler->addJoinerToBPP(*bs); break; @@ -1923,6 +1954,11 @@ struct ReadThread PriorityThreadPool::Job job; job.functor = boost::shared_ptr(new BPPHandler::LastJoiner(fBPPHandler, bs)); job.id = fBPPHandler->getUniqueID(bs, ismHdr->Command); + const uint8_t *buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t *) &buf[pos+6]); + job.uniqueID = *((uint32_t *) &buf[pos+10]); + job.sock = outIos; OOBPool->addJob(job); break; } @@ -1932,6 +1968,11 @@ struct ReadThread PriorityThreadPool::Job job; job.functor = boost::shared_ptr(new BPPHandler::Destroy(fBPPHandler, bs)); job.id = fBPPHandler->getUniqueID(bs, ismHdr->Command); + const uint8_t *buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t *) &buf[pos+6]); + job.uniqueID = *((uint32_t *) &buf[pos+10]); + job.sock = outIos; OOBPool->addJob(job); //fBPPHandler->destroyBPP(*bs); break; @@ -1946,6 +1987,11 @@ struct ReadThread PriorityThreadPool::Job job; job.functor = boost::shared_ptr(new BPPHandler::Abort(fBPPHandler, bs)); job.id = fBPPHandler->getUniqueID(bs, ismHdr->Command); + const uint8_t *buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t *) &buf[pos+6]); + job.uniqueID = *((uint32_t *) &buf[pos+10]); + job.sock = outIos; OOBPool->addJob(job); break; } diff --git a/utils/threadpool/prioritythreadpool.cpp b/utils/threadpool/prioritythreadpool.cpp index a0fc6a347..4d19df91e 100644 --- a/utils/threadpool/prioritythreadpool.cpp +++ b/utils/threadpool/prioritythreadpool.cpp @@ -33,6 +33,8 @@ using namespace logging; #include "prioritythreadpool.h" using namespace boost; +#include "dbcon/joblist/primitivemsg.h" + namespace threadpool { @@ -48,9 +50,9 @@ PriorityThreadPool::PriorityThreadPool(uint targetWeightPerRun, uint highThreads threads.create_thread(ThreadHelper(this, LOW)); cout << "started " << highThreads << " high, " << midThreads << " med, " << lowThreads << " low.\n"; - threadCounts[HIGH] = highThreads; - threadCounts[MEDIUM] = midThreads; - threadCounts[LOW] = lowThreads; + defaultThreadCounts[HIGH] = threadCounts[HIGH] = highThreads; + defaultThreadCounts[MEDIUM] = threadCounts[MEDIUM] = midThreads; + defaultThreadCounts[LOW] = threadCounts[LOW] = lowThreads; } PriorityThreadPool::~PriorityThreadPool() @@ -65,6 +67,23 @@ void PriorityThreadPool::addJob(const Job &job, bool useLock) if (useLock) lk.lock(); + // Create any missing threads + if (defaultThreadCounts[HIGH] != threadCounts[HIGH]) + { + threads.create_thread(ThreadHelper(this, HIGH)); + threadCounts[HIGH]++; + } + if (defaultThreadCounts[MEDIUM] != threadCounts[MEDIUM]) + { + threads.create_thread(ThreadHelper(this, MEDIUM)); + threadCounts[MEDIUM]++; + } + if (defaultThreadCounts[LOW] != threadCounts[LOW]) + { + threads.create_thread(ThreadHelper(this, LOW)); + threadCounts[LOW]++; + } + if (job.priority > 66) jobQueues[HIGH].push_back(job); else if (job.priority > 33) @@ -110,6 +129,7 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw() vector reschedule; uint32_t rescheduleCount; uint32_t queueSize; + bool running = false; try { @@ -143,15 +163,12 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw() reschedule.resize(runList.size()); rescheduleCount = 0; for (i = 0; i < runList.size() && !_stop; i++) { - try { reschedule[i] = false; + running = true; reschedule[i] = (*(runList[i].functor))(); + running = false; if (reschedule[i]) rescheduleCount++; - } - catch (std::exception &e) { - cerr << e.what() << endl; - } } // no real work was done, prevent intensive busy waiting @@ -177,6 +194,7 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw() // Log the exception and exit this thread try { + threadCounts[queue]--; #ifndef NOLOGGING logging::Message::Args args; logging::Message message(5); @@ -190,6 +208,8 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw() ml.logErrorMessage( message ); #endif + if (running) + sendErrorMsg(runList[i].uniqueID, runList[i].stepID, runList[i].sock); } catch (...) { @@ -201,6 +221,7 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw() // Log the exception and exit this thread try { + threadCounts[queue]--; #ifndef NOLOGGING logging::Message::Args args; logging::Message message(6); @@ -213,6 +234,8 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw() ml.logErrorMessage( message ); #endif + if (running) + sendErrorMsg(runList[i].uniqueID, runList[i].stepID, runList[i].sock); } catch (...) { @@ -220,6 +243,21 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw() } } +void PriorityThreadPool::sendErrorMsg(uint32_t id, uint32_t step, primitiveprocessor::SP_UM_IOSOCK sock) +{ + ISMPacketHeader ism; + PrimitiveHeader ph = {0}; + + ism.Status = logging::primitiveServerErr; + ph.UniqueID = id; + ph.StepID = step; + ByteStream msg(sizeof(ISMPacketHeader) + sizeof(PrimitiveHeader)); + msg.append((uint8_t *) &ism, sizeof(ism)); + msg.append((uint8_t *) &ph, sizeof(ph)); + + sock->write(msg); +} + void PriorityThreadPool::stop() { _stop = true; diff --git a/utils/threadpool/prioritythreadpool.h b/utils/threadpool/prioritythreadpool.h index 516c0df2f..649913d95 100644 --- a/utils/threadpool/prioritythreadpool.h +++ b/utils/threadpool/prioritythreadpool.h @@ -36,6 +36,7 @@ #include #include #include "../winport/winport.h" +#include "primitives/primproc/umsocketselector.h" namespace threadpool { @@ -60,6 +61,9 @@ public: uint32_t weight; uint32_t priority; uint32_t id; + uint32_t uniqueID; + uint32_t stepID; + primitiveprocessor::SP_UM_IOSOCK sock; }; enum Priority { @@ -105,9 +109,11 @@ private: Priority pickAQueue(Priority preference); void threadFcn(const Priority preferredQueue) throw(); + void sendErrorMsg(uint32_t id, uint32_t step, primitiveprocessor::SP_UM_IOSOCK sock); std::list jobQueues[3]; // higher indexes = higher priority uint32_t threadCounts[3]; + uint32_t defaultThreadCounts[3]; boost::mutex mutex; boost::condition newJob; boost::thread_group threads; From d6cb205dfc6190b22dcd41f43af74839a77def25 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Thu, 14 Jun 2018 18:37:52 +0100 Subject: [PATCH 048/123] MCOL-1475 Improve cross engine error handling Now shows MariaDB error code and message where possible. --- dbcon/joblist/crossenginestep.cpp | 15 ++++++++++----- dbcon/joblist/crossenginestep.h | 2 ++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/dbcon/joblist/crossenginestep.cpp b/dbcon/joblist/crossenginestep.cpp index 7f9f3199f..b91e83150 100644 --- a/dbcon/joblist/crossenginestep.cpp +++ b/dbcon/joblist/crossenginestep.cpp @@ -773,11 +773,16 @@ string CrossEngineStep::makeQuery() void CrossEngineStep::handleMySqlError(const char* errStr, unsigned int errCode) { ostringstream oss; - oss << errStr << "(" << errCode << ")"; - if (errCode == (unsigned int) -1) - oss << "(null pointer)"; - else - oss << "(" << errCode << ")"; + if (mysql->getErrno()) + { + oss << errStr << " (" << mysql->getErrno() << ")"; + oss << " (" << mysql->getErrorMsg() << ")"; + } + else + { + oss << errStr << " (" << errCode << ")"; + oss << " (unknown)"; + } throw IDBExcept(oss.str(), ERR_CROSS_ENGINE_CONNECT); diff --git a/dbcon/joblist/crossenginestep.h b/dbcon/joblist/crossenginestep.h index 30b715c25..ad731b384 100644 --- a/dbcon/joblist/crossenginestep.h +++ b/dbcon/joblist/crossenginestep.h @@ -70,6 +70,8 @@ public: long getFieldLength(int field) { return fieldLengths[field]; } MYSQL_FIELD* getField(int field) { return &fFields[field]; } const std::string& getError() { return fErrStr; } + unsigned int getErrno() { return mysql_errno(fCon); } + const char* getErrorMsg() { return mysql_error(fCon); } private: MYSQL* fCon; From 337bd9ba8c19630df02f2a2477180c4edefd5068 Mon Sep 17 00:00:00 2001 From: david hill Date: Mon, 18 Jun 2018 15:16:47 -0500 Subject: [PATCH 049/123] Update README --- README | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README b/README index 21bce2dd9..90421faa8 100644 --- a/README +++ b/README @@ -1,9 +1,9 @@ -This is MariaDB ColumnStore 1.1.4 GA -MariaDB ColumnStore 1.1.4 GA is the development version of MariaDB ColumnStore. +This is MariaDB ColumnStore 1.1 GA +MariaDB ColumnStore 1.1 GA is the development version of MariaDB ColumnStore. It is built by porting InfiniDB 4.6.7 on MariaDB 10.2 and adding entirely new features not found anywhere else. -MariaDB ColumnStore 1.1.4 is a GA release. This is the first MariaDB +MariaDB ColumnStore 1.1 is a GA release. This is the first MariaDB ColumnStore release, not all features planned for the MariaDB ColumnStore 1.0 series are included in this release. From 9f6df3b0ce87a5579133156305282a9c81cc801e Mon Sep 17 00:00:00 2001 From: david hill Date: Mon, 18 Jun 2018 15:17:58 -0500 Subject: [PATCH 050/123] Update VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index aef96eefc..8a397bd3a 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ COLUMNSTORE_VERSION_MAJOR=1 COLUMNSTORE_VERSION_MINOR=1 -COLUMNSTORE_VERSION_PATCH=5 +COLUMNSTORE_VERSION_PATCH=6 COLUMNSTORE_VERSION_RELEASE=1 From 1f8083e59b8370d027a11d24b3e664e2f6d2eaf4 Mon Sep 17 00:00:00 2001 From: david hill Date: Mon, 18 Jun 2018 15:18:39 -0500 Subject: [PATCH 051/123] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index bd3717aab..2a5a19e09 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ -#MariaDB ColumnStore Storage/Execution engine 1.1.4 GA -MariaDB ColumnStore 1.1.4 GA is the development version of MariaDB ColumnStore. +#MariaDB ColumnStore Storage/Execution engine 1.1 GA +MariaDB ColumnStore 1.1 GA is the development version of MariaDB ColumnStore. It is built by porting InfiniDB 4.6.7 on MariaDB 10.2.9 and adding entirely new features not found anywhere else. -#MariaDB ColumnStore 1.1.4 is an GA release. +#MariaDB ColumnStore 1.1 is an GA release. #Building This repository is not meant to be built independently outside of the server. This repository is integrated into http://mariadb-corporation/mariadb-columnstore-server (ie, the *server*) as a git submodule. As such, you can find complete build instructions on *the server* page. From d3d322ed7bb8a2c7c7bcfb484ae96469d18f9442 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Tue, 19 Jun 2018 15:12:44 +0100 Subject: [PATCH 052/123] MCOL-1484 I_S condition pushdowns Add condition pushdowns to the information_schema tables to give a performance improvement when a relevant WHERE condition is provided. In addition there is a new table_usage() stored procedure designed to use the pushdowns. --- dbcon/mysql/columnstore_info.sql | 65 +++--- dbcon/mysql/is_columnstore_columns.cpp | 71 ++++++- dbcon/mysql/is_columnstore_extents.cpp | 283 +++++++++++++++---------- dbcon/mysql/is_columnstore_files.cpp | 231 +++++++++++++------- dbcon/mysql/is_columnstore_tables.cpp | 69 ++++++ 5 files changed, 505 insertions(+), 214 deletions(-) diff --git a/dbcon/mysql/columnstore_info.sql b/dbcon/mysql/columnstore_info.sql index 563052a11..d0433a0d9 100644 --- a/dbcon/mysql/columnstore_info.sql +++ b/dbcon/mysql/columnstore_info.sql @@ -37,43 +37,56 @@ DROP PROCEDURE IF EXISTS `table_usage` // CREATE PROCEDURE table_usage (IN t_schema char(64), IN t_name char(64)) `table_usage`: BEGIN + DECLARE done INTEGER DEFAULT 0; + DECLARE dbname VARCHAR(64); + DECLARE tbname VARCHAR(64); + DECLARE object_ids TEXT; + DECLARE dictionary_object_ids TEXT; DECLARE `locker` TINYINT UNSIGNED DEFAULT IS_USED_LOCK('table_usage'); - + DECLARE columns_list CURSOR FOR SELECT TABLE_SCHEMA, TABLE_NAME, GROUP_CONCAT(object_id) OBJECT_IDS, GROUP_CONCAT(dictionary_object_id) DICT_OBJECT_IDS FROM INFORMATION_SCHEMA.COLUMNSTORE_COLUMNS WHERE table_name = t_name and table_schema = t_schema GROUP BY table_schema, table_name; + DECLARE columns_list_sc CURSOR FOR SELECT TABLE_SCHEMA, TABLE_NAME, GROUP_CONCAT(object_id) OBJECT_IDS, GROUP_CONCAT(dictionary_object_id) DICT_OBJECT_IDS FROM INFORMATION_SCHEMA.COLUMNSTORE_COLUMNS WHERE table_schema = t_schema GROUP BY table_schema, table_name; + DECLARE columns_list_all CURSOR FOR SELECT TABLE_SCHEMA, TABLE_NAME, GROUP_CONCAT(object_id) OBJECT_IDS, GROUP_CONCAT(dictionary_object_id) DICT_OBJECT_IDS FROM INFORMATION_SCHEMA.COLUMNSTORE_COLUMNS GROUP BY table_schema, table_name; + DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1; IF `locker` IS NOT NULL THEN SIGNAL SQLSTATE '45000' SET MESSAGE_TEXT = 'Error acquiring table_usage lock'; LEAVE `table_usage`; END IF; DO GET_LOCK('table_usage', 0); - DROP TABLE IF EXISTS columnstore_info.columnstore_columns; DROP TABLE IF EXISTS columnstore_info.columnstore_files; - CREATE TABLE columnstore_info.columnstore_columns engine=myisam as (select * from information_schema.columnstore_columns); - ALTER TABLE columnstore_info.columnstore_columns ADD INDEX `object_id` (`object_id`); - ALTER TABLE columnstore_info.columnstore_columns ADD INDEX `dictionary_object_id` (`dictionary_object_id`); - CREATE TABLE columnstore_info.columnstore_files engine=myisam as (select * from information_schema.columnstore_files); - ALTER TABLE columnstore_info.columnstore_files ADD INDEX `object_id` (`object_id`); + CREATE TEMPORARY TABLE columnstore_info.columnstore_files (TABLE_SCHEMA VARCHAR(64), TABLE_NAME VARCHAR(64), DATA BIGINT, DICT BIGINT); + IF t_name IS NOT NULL THEN -SELECT TABLE_SCHEMA, TABLE_NAME, columnstore_info.format_filesize(data) as DATA_DISK_USAGE, columnstore_info.format_filesize(dict) as DICT_DISK_USAGE, columnstore_info.format_filesize(data + COALESCE(dict, 0)) as TOTAL_USAGE FROM ( -SELECT TABLE_SCHEMA, TABLE_NAME, (SELECT sum(cf.file_size) as data FROM columnstore_info.columnstore_columns cc JOIN columnstore_info.columnstore_files cf ON cc.object_id = cf.object_id WHERE table_name = ics.table_name and table_schema = ics.table_schema) as data, (SELECT sum(cf.file_size) as dict FROM columnstore_info.columnstore_columns cc JOIN columnstore_info.columnstore_files cf ON cc.dictionary_object_id = cf.object_id WHERE table_name = ics.table_name and table_schema = ics.table_schema GROUP BY table_schema, table_name) as dict -FROM -columnstore_info.columnstore_columns ics where table_name = t_name and (table_schema = t_schema or t_schema IS NULL) -group by table_schema, table_name -) q; + OPEN columns_list; ELSEIF t_schema IS NOT NULL THEN -SELECT TABLE_SCHEMA, TABLE_NAME, columnstore_info.format_filesize(data) as DATA_DISK_USAGE, columnstore_info.format_filesize(dict) as DICT_DISK_USAGE, columnstore_info.format_filesize(data + COALESCE(dict, 0)) as TOTAL_USAGE FROM ( -SELECT TABLE_SCHEMA, TABLE_NAME, (SELECT sum(cf.file_size) as data FROM columnstore_info.columnstore_columns cc JOIN columnstore_info.columnstore_files cf ON cc.object_id = cf.object_id WHERE table_name = ics.table_name and table_schema = ics.table_schema) as data, (SELECT sum(cf.file_size) as dict FROM columnstore_info.columnstore_columns cc JOIN columnstore_info.columnstore_files cf ON cc.dictionary_object_id = cf.object_id WHERE table_name = ics.table_name and table_schema = ics.table_schema GROUP BY table_schema, table_name) as dict -FROM -columnstore_info.columnstore_columns ics where table_schema = t_schema -group by table_schema, table_name -) q; + OPEN columns_list_sc; ELSE -SELECT TABLE_SCHEMA, TABLE_NAME, columnstore_info.format_filesize(data) as DATA_DISK_USAGE, columnstore_info.format_filesize(dict) as DICT_DISK_USAGE, columnstore_info.format_filesize(data + COALESCE(dict, 0)) as TOTAL_USAGE FROM ( -SELECT TABLE_SCHEMA, TABLE_NAME, (SELECT sum(cf.file_size) as data FROM columnstore_info.columnstore_columns cc JOIN columnstore_info.columnstore_files cf ON cc.object_id = cf.object_id WHERE table_name = ics.table_name and table_schema = ics.table_schema) as data, (SELECT sum(cf.file_size) as dict FROM columnstore_info.columnstore_columns cc JOIN columnstore_info.columnstore_files cf ON cc.dictionary_object_id = cf.object_id WHERE table_name = ics.table_name and table_schema = ics.table_schema GROUP BY table_schema, table_name) as dict -FROM -columnstore_info.columnstore_columns ics -group by table_schema, table_name -) q; + OPEN columns_list_all; END IF; - DROP TABLE IF EXISTS columnstore_info.columnstore_columns; + + files_table: LOOP + IF t_name IS NOT NULL THEN + FETCH columns_list INTO dbname, tbname, object_ids, dictionary_object_ids; + ELSEIF t_schema IS NOT NULL THEN + FETCH columns_list_sc INTO dbname, tbname, object_ids, dictionary_object_ids; + ELSE + FETCH columns_list_all INTO dbname, tbname, object_ids, dictionary_object_ids; + END IF; + IF done = 1 THEN LEAVE files_table; + END IF; + INSERT INTO columnstore_info.columnstore_files (SELECT dbname, tbname, sum(file_size), 0 FROM information_schema.columnstore_files WHERE find_in_set(object_id, object_ids)); + IF dictionary_object_ids IS NOT NULL THEN + UPDATE columnstore_info.columnstore_files SET DICT = (SELECT sum(file_size) FROM information_schema.columnstore_files WHERE find_in_set(object_id, dictionary_object_ids)) WHERE TABLE_SCHEMA = dbname AND TABLE_NAME = tbname; + END IF; + END LOOP; + IF t_name IS NOT NULL THEN + CLOSE columns_list; + ELSEIF t_schema IS NOT NULL THEN + CLOSE columns_list_sc; + ELSE + CLOSE columns_list_all; + END IF; + SELECT TABLE_SCHEMA, TABLE_NAME, columnstore_info.format_filesize(DATA) as DATA_DISK_USAGE, columnstore_info.format_filesize(DICT) as DICT_DATA_USAGE, columnstore_info.format_filesize(DATA + COALESCE(DICT, 0)) as TOTAL_USAGE FROM columnstore_info.columnstore_files; + DROP TABLE IF EXISTS columnstore_info.columnstore_files; DO RELEASE_LOCK('table_usage'); END // diff --git a/dbcon/mysql/is_columnstore_columns.cpp b/dbcon/mysql/is_columnstore_columns.cpp index b446eed4e..e2f338782 100644 --- a/dbcon/mysql/is_columnstore_columns.cpp +++ b/dbcon/mysql/is_columnstore_columns.cpp @@ -56,10 +56,59 @@ ST_FIELD_INFO is_columnstore_columns_fields[] = }; +static void get_cond_item(Item_func *item, String **table, String **db) +{ + char tmp_char[MAX_FIELD_WIDTH]; + Item_field *item_field = (Item_field*) item->arguments()[0]->real_item(); + if (strcasecmp(item_field->field_name, "table_name") == 0) + { + String str_buf(tmp_char, sizeof(tmp_char), system_charset_info); + *table = item->arguments()[1]->val_str(&str_buf); + return; + } + else if (strcasecmp(item_field->field_name, "table_schema") == 0) + { + String str_buf(tmp_char, sizeof(tmp_char), system_charset_info); + *db = item->arguments()[1]->val_str(&str_buf); + return; + } +} + +static void get_cond_items(COND *cond, String **table, String **db) +{ + if (cond->type() == Item::FUNC_ITEM) + { + Item_func* fitem = (Item_func*) cond; + if (fitem->arguments()[0]->real_item()->type() == Item::FIELD_ITEM && + fitem->arguments()[1]->const_item()) + { + get_cond_item(fitem, table, db); + } + } + else if ((cond->type() == Item::COND_ITEM) && (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)) + { + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item= li++)) + { + if (item->type() == Item::FUNC_ITEM) + { + get_cond_item((Item_func*)item, table, db); + } + else + { + get_cond_items(item, table, db); + } + } + } +} + static int is_columnstore_columns_fill(THD *thd, TABLE_LIST *tables, COND *cond) { CHARSET_INFO *cs = system_charset_info; TABLE *table = tables->table; + String *table_name = NULL; + String *db_name = NULL; boost::shared_ptr systemCatalogPtr = execplan::CalpontSystemCatalog::makeCalpontSystemCatalog(execplan::CalpontSystemCatalog::idb_tid2sid(thd->thread_id)); @@ -69,9 +118,29 @@ static int is_columnstore_columns_fill(THD *thd, TABLE_LIST *tables, COND *cond) systemCatalogPtr->identity(execplan::CalpontSystemCatalog::FE); + if (cond) + { + get_cond_items(cond, &table_name, &db_name); + } + for (std::vector >::const_iterator it = catalog_tables.begin(); it != catalog_tables.end(); ++it) { + if (db_name) + { + if ((*it).second.schema.compare(db_name->ptr()) != 0) + { + continue; + } + } + if (table_name) + { + if ((*it).second.table.compare(table_name->ptr()) != 0) + { + continue; + } + } + execplan::CalpontSystemCatalog::RIDList column_rid_list; // Note a table may get dropped as you iterate over the list of tables. // So simply ignore the dropped table. @@ -168,8 +237,6 @@ static int is_columnstore_columns_fill(THD *thd, TABLE_LIST *tables, COND *cond) } } - - return 0; } diff --git a/dbcon/mysql/is_columnstore_extents.cpp b/dbcon/mysql/is_columnstore_extents.cpp index 4ee4cbce6..27eceeafc 100644 --- a/dbcon/mysql/is_columnstore_extents.cpp +++ b/dbcon/mysql/is_columnstore_extents.cpp @@ -52,131 +52,200 @@ ST_FIELD_INFO is_columnstore_extents_fields[] = {0, 0, MYSQL_TYPE_NULL, 0, 0, 0, 0} }; -static int is_columnstore_extents_fill(THD *thd, TABLE_LIST *tables, COND *cond) +static int generate_result(BRM::OID_t oid, BRM::DBRM *emp, TABLE *table, THD *thd) { CHARSET_INFO *cs = system_charset_info; - TABLE *table = tables->table; std::vector entries; std::vector::iterator iter; std::vector::iterator end; + emp->getExtents(oid, entries, false, false, true); + if (entries.size() == 0) + return 0; + + iter = entries.begin(); + end = entries.end(); + while (iter != end) + { + table->field[0]->store(oid); + if (iter->colWid > 0) + { + table->field[1]->store("Column", strlen("Column"), cs); + if (iter->partition.cprange.lo_val == std::numeric_limits::max() || + iter->partition.cprange.lo_val <= (std::numeric_limits::min() + 2)) + { + table->field[4]->set_null(); + } + else + { + table->field[4]->set_notnull(); + table->field[4]->store(iter->partition.cprange.lo_val); + } + if (iter->partition.cprange.hi_val == std::numeric_limits::max() || + iter->partition.cprange.hi_val <= (std::numeric_limits::min() + 2)) + { + table->field[5]->set_null(); + } + else + { + table->field[5]->set_notnull(); + table->field[5]->store(iter->partition.cprange.hi_val); + } + table->field[6]->store(iter->colWid); + + } + else + { + table->field[1]->store("Dictionary", strlen("Dictionary"), cs); + table->field[4]->set_null(); + table->field[5]->set_null(); + table->field[6]->store(8192); + } + table->field[2]->store(iter->range.start); + table->field[3]->store(iter->range.start + (iter->range.size * 1024) - 1); + + table->field[7]->store(iter->dbRoot); + table->field[8]->store(iter->partitionNum); + table->field[9]->store(iter->segmentNum); + table->field[10]->store(iter->blockOffset); + table->field[11]->store(iter->range.size * 1024); + table->field[12]->store(iter->HWM); + + switch (iter->partition.cprange.isValid) + { + case 0: + table->field[13]->store("Invalid", strlen("Invalid"), cs); + break; + case 1: + table->field[13]->store("Updating", strlen("Updating"), cs); + break; + case 2: + table->field[13]->store("Valid", strlen("Valid"), cs); + break; + default: + table->field[13]->store("Unknown", strlen("Unknown"), cs); + break; + } + switch (iter->status) + { + case BRM::EXTENTAVAILABLE: + table->field[14]->store("Available", strlen("Available"), cs); + break; + case BRM::EXTENTUNAVAILABLE: + table->field[14]->store("Unavailable", strlen("Unavailable"), cs); + break; + case BRM::EXTENTOUTOFSERVICE: + table->field[14]->store("Out of service", strlen("Out of service"), cs); + break; + default: + table->field[14]->store("Unknown", strlen("Unknown"), cs); + } + // MCOL-1016: on multiple segments HWM is set to 0 on the lower + // segments, we don't want these to show as 8KB. The down side is + // if the column has less than 1 block it will show as 0 bytes. + // We have no lookahead without it getting messy so this is the + // best compromise. + if (iter->HWM == 0) + { + table->field[15]->store(0); + } + else + { + table->field[15]->store((iter->HWM + 1) * 8192); + } + + if (schema_table_store_record(thd, table)) + { + delete emp; + return 1; + } + + iter++; + + } + return 0; +} + +static int is_columnstore_extents_fill(THD *thd, TABLE_LIST *tables, COND *cond) +{ + BRM::OID_t cond_oid = 0; + TABLE *table = tables->table; + BRM::DBRM *emp = new BRM::DBRM(); if (!emp || !emp->isDBRMReady()) { return 1; } + if (cond && cond->type() == Item::FUNC_ITEM) + { + Item_func* fitem = (Item_func*) cond; + if ((fitem->functype() == Item_func::EQ_FUNC) && (fitem->argument_count() == 2)) + { + if(fitem->arguments()[0]->real_item()->type() == Item::FIELD_ITEM && + fitem->arguments()[1]->const_item()) + { + // WHERE object_id = value + Item_field *item_field = (Item_field*) fitem->arguments()[0]->real_item(); + if (strcasecmp(item_field->field_name, "object_id") == 0) + { + cond_oid = fitem->arguments()[1]->val_int(); + return generate_result(cond_oid, emp, table, thd); + } + } + else if (fitem->arguments()[1]->real_item()->type() == Item::FIELD_ITEM && + fitem->arguments()[0]->const_item()) + { + // WHERE value = object_id + Item_field *item_field = (Item_field*) fitem->arguments()[1]->real_item(); + if (strcasecmp(item_field->field_name, "object_id") == 0) + { + cond_oid = fitem->arguments()[0]->val_int(); + return generate_result(cond_oid, emp, table, thd); + } + } + } + else if (fitem->functype() == Item_func::IN_FUNC) + { + // WHERE object_id in (value1, value2) + Item_field *item_field = (Item_field*) fitem->arguments()[0]->real_item(); + if (strcasecmp(item_field->field_name, "object_id") == 0) + { + for (unsigned int i=1; i < fitem->argument_count(); i++) + { + cond_oid = fitem->arguments()[i]->val_int(); + int result = generate_result(cond_oid, emp, table, thd); + if (result) + return 1; + } + } + } + else if (fitem->functype() == Item_func::UNKNOWN_FUNC && + strcasecmp(fitem->func_name(), "find_in_set") == 0) + { + // WHERE FIND_IN_SET(object_id, values) + String *tmp_var = fitem->arguments()[1]->val_str(); + std::stringstream ss(tmp_var->ptr()); + while (ss >> cond_oid) + { + int ret = generate_result(cond_oid, emp, table, thd); + if (ret) + return 1; + if (ss.peek() == ',') + ss.ignore(); + } + } + } + execplan::ObjectIDManager oidm; BRM::OID_t MaxOID = oidm.size(); for(BRM::OID_t oid = 3000; oid <= MaxOID; oid++) { - emp->getExtents(oid, entries, false, false, true); - if (entries.size() == 0) - continue; - - iter = entries.begin(); - end = entries.end(); - while (iter != end) - { - table->field[0]->store(oid); - if (iter->colWid > 0) - { - table->field[1]->store("Column", strlen("Column"), cs); - if (iter->partition.cprange.lo_val == std::numeric_limits::max() || - iter->partition.cprange.lo_val <= (std::numeric_limits::min() + 2)) - { - table->field[4]->set_null(); - } - else - { - table->field[4]->set_notnull(); - table->field[4]->store(iter->partition.cprange.lo_val); - } - if (iter->partition.cprange.hi_val == std::numeric_limits::max() || - iter->partition.cprange.hi_val <= (std::numeric_limits::min() + 2)) - { - table->field[5]->set_null(); - } - else - { - table->field[5]->set_notnull(); - table->field[5]->store(iter->partition.cprange.hi_val); - } - table->field[6]->store(iter->colWid); - - } - else - { - table->field[1]->store("Dictionary", strlen("Dictionary"), cs); - table->field[4]->set_null(); - table->field[5]->set_null(); - table->field[6]->store(8192); - } - table->field[2]->store(iter->range.start); - table->field[3]->store(iter->range.start + (iter->range.size * 1024) - 1); - - table->field[7]->store(iter->dbRoot); - table->field[8]->store(iter->partitionNum); - table->field[9]->store(iter->segmentNum); - table->field[10]->store(iter->blockOffset); - table->field[11]->store(iter->range.size * 1024); - table->field[12]->store(iter->HWM); - - switch (iter->partition.cprange.isValid) - { - case 0: - table->field[13]->store("Invalid", strlen("Invalid"), cs); - break; - case 1: - table->field[13]->store("Updating", strlen("Updating"), cs); - break; - case 2: - table->field[13]->store("Valid", strlen("Valid"), cs); - break; - default: - table->field[13]->store("Unknown", strlen("Unknown"), cs); - break; - } - switch (iter->status) - { - case BRM::EXTENTAVAILABLE: - table->field[14]->store("Available", strlen("Available"), cs); - break; - case BRM::EXTENTUNAVAILABLE: - table->field[14]->store("Unavailable", strlen("Unavailable"), cs); - break; - case BRM::EXTENTOUTOFSERVICE: - table->field[14]->store("Out of service", strlen("Out of service"), cs); - break; - default: - table->field[14]->store("Unknown", strlen("Unknown"), cs); - } - // MCOL-1016: on multiple segments HWM is set to 0 on the lower - // segments, we don't want these to show as 8KB. The down side is - // if the column has less than 1 block it will show as 0 bytes. - // We have no lookahead without it getting messy so this is the - // best compromise. - if (iter->HWM == 0) - { - table->field[15]->store(0); - } - else - { - table->field[15]->store((iter->HWM + 1) * 8192); - } - - if (schema_table_store_record(thd, table)) - { - delete emp; - return 1; - } - - iter++; - - } + int result = generate_result(oid, emp, table, thd); + if (result) + return 1; } - delete emp; return 0; } diff --git a/dbcon/mysql/is_columnstore_files.cpp b/dbcon/mysql/is_columnstore_files.cpp index ce00b8aae..1a5fdad1e 100644 --- a/dbcon/mysql/is_columnstore_files.cpp +++ b/dbcon/mysql/is_columnstore_files.cpp @@ -82,12 +82,10 @@ static bool get_file_sizes(messageqcpp::MessageQueueClient *msgQueueClient, cons } } -static int is_columnstore_files_fill(THD *thd, TABLE_LIST *tables, COND *cond) +static int generate_result(BRM::OID_t oid, BRM::DBRM *emp, TABLE *table, THD *thd) { - BRM::DBRM *emp = new BRM::DBRM(); std::vector entries; CHARSET_INFO *cs = system_charset_info; - TABLE *table = tables->table; char oidDirName[WriteEngine::FILE_NAME_SIZE]; char fullFileName[WriteEngine::FILE_NAME_SIZE]; @@ -101,93 +99,168 @@ static int is_columnstore_files_fill(THD *thd, TABLE_LIST *tables, COND *cond) oam::Oam oam_instance; int pmId = 0; + emp->getExtents(oid, entries, false, false, true); + if (entries.size() == 0) + return 0; + + std::vector::const_iterator iter = entries.begin(); + while ( iter != entries.end() ) //organize extents into files + { + // Don't include files more than once at different block offsets + if (iter->blockOffset > 0) + { + iter++; + return 0; + } + + try + { + oam_instance.getDbrootPmConfig(iter->dbRoot, pmId); + } + catch (std::runtime_error) + { + // MCOL-1116: If we are here a DBRoot is offline/missing + iter++; + return 0; + } + table->field[0]->store(oid); + table->field[1]->store(iter->segmentNum); + table->field[2]->store(iter->partitionNum); + + WriteEngine::Convertor::oid2FileName(oid, oidDirName, dbDir, iter->partitionNum, iter->segmentNum); + std::stringstream DbRootName; + DbRootName << "DBRoot" << iter->dbRoot; + std::string DbRootPath = config->getConfig("SystemConfig", DbRootName.str()); + fileSize = compressedFileSize = 0; + snprintf(fullFileName, WriteEngine::FILE_NAME_SIZE, "%s/%s", DbRootPath.c_str(), oidDirName); + + std::ostringstream oss; + oss << "pm" << pmId << "_WriteEngineServer"; + std::string client = oss.str(); + msgQueueClient = messageqcpp::MessageQueueClientPool::getInstance(oss.str()); + + if (!get_file_sizes(msgQueueClient, fullFileName, &fileSize, &compressedFileSize)) + { + messageqcpp::MessageQueueClientPool::releaseInstance(msgQueueClient); + delete emp; + return 1; + } + table->field[3]->store(fullFileName, strlen(fullFileName), cs); + + if (fileSize > 0) + { + table->field[4]->set_notnull(); + table->field[4]->store(fileSize); + if (compressedFileSize > 0) + { + table->field[5]->set_notnull(); + table->field[5]->store(compressedFileSize); + } + else + { + table->field[5]->set_null(); + } + } + else + { + table->field[4]->set_null(); + table->field[5]->set_null(); + } + + if (schema_table_store_record(thd, table)) + { + messageqcpp::MessageQueueClientPool::releaseInstance(msgQueueClient); + delete emp; + return 1; + } + iter++; + messageqcpp::MessageQueueClientPool::releaseInstance(msgQueueClient); + msgQueueClient = NULL; + } + return 0; +} + +static int is_columnstore_files_fill(THD *thd, TABLE_LIST *tables, COND *cond) +{ + BRM::DBRM *emp = new BRM::DBRM(); + BRM::OID_t cond_oid = 0; + TABLE *table = tables->table; + if (!emp || !emp->isDBRMReady()) { return 1; } + if (cond && cond->type() == Item::FUNC_ITEM) + { + Item_func* fitem = (Item_func*) cond; + if ((fitem->functype() == Item_func::EQ_FUNC) && (fitem->argument_count() == 2)) + { + if(fitem->arguments()[0]->real_item()->type() == Item::FIELD_ITEM && + fitem->arguments()[1]->const_item()) + { + // WHERE object_id = value + Item_field *item_field = (Item_field*) fitem->arguments()[0]->real_item(); + if (strcasecmp(item_field->field_name, "object_id") == 0) + { + cond_oid = fitem->arguments()[1]->val_int(); + return generate_result(cond_oid, emp, table, thd); + } + } + else if (fitem->arguments()[1]->real_item()->type() == Item::FIELD_ITEM && + fitem->arguments()[0]->const_item()) + { + // WHERE value = object_id + Item_field *item_field = (Item_field*) fitem->arguments()[1]->real_item(); + if (strcasecmp(item_field->field_name, "object_id") == 0) + { + cond_oid = fitem->arguments()[0]->val_int(); + return generate_result(cond_oid, emp, table, thd); + } + } + } + else if (fitem->functype() == Item_func::IN_FUNC) + { + // WHERE object_id in (value1, value2) + Item_field *item_field = (Item_field*) fitem->arguments()[0]->real_item(); + if (strcasecmp(item_field->field_name, "object_id") == 0) + { + for (unsigned int i=1; i < fitem->argument_count(); i++) + { + cond_oid = fitem->arguments()[i]->val_int(); + int result = generate_result(cond_oid, emp, table, thd); + if (result) + return 1; + } + } + } + else if (fitem->functype() == Item_func::UNKNOWN_FUNC && + strcasecmp(fitem->func_name(), "find_in_set") == 0) + { + // WHERE FIND_IN_SET(object_id, values) + String *tmp_var = fitem->arguments()[1]->val_str(); + std::stringstream ss(tmp_var->ptr()); + while (ss >> cond_oid) + { + int ret = generate_result(cond_oid, emp, table, thd); + if (ret) + return 1; + if (ss.peek() == ',') + ss.ignore(); + } + } + } + execplan::ObjectIDManager oidm; BRM::OID_t MaxOID = oidm.size(); - for(BRM::OID_t oid = 3000; oid <= MaxOID; oid++) + if (!cond_oid) { - emp->getExtents(oid, entries, false, false, true); - if (entries.size() == 0) - continue; - - std::vector::const_iterator iter = entries.begin(); - while ( iter != entries.end() ) //organize extents into files + for(BRM::OID_t oid = 3000; oid <= MaxOID; oid++) { - // Don't include files more than once at different block offsets - if (iter->blockOffset > 0) - { - iter++; - continue; - } - - try - { - oam_instance.getDbrootPmConfig(iter->dbRoot, pmId); - } - catch (std::runtime_error) - { - // MCOL-1116: If we are here a DBRoot is offline/missing - iter++; - continue; - } - table->field[0]->store(oid); - table->field[1]->store(iter->segmentNum); - table->field[2]->store(iter->partitionNum); - - WriteEngine::Convertor::oid2FileName(oid, oidDirName, dbDir, iter->partitionNum, iter->segmentNum); - std::stringstream DbRootName; - DbRootName << "DBRoot" << iter->dbRoot; - std::string DbRootPath = config->getConfig("SystemConfig", DbRootName.str()); - fileSize = compressedFileSize = 0; - snprintf(fullFileName, WriteEngine::FILE_NAME_SIZE, "%s/%s", DbRootPath.c_str(), oidDirName); - - std::ostringstream oss; - oss << "pm" << pmId << "_WriteEngineServer"; - std::string client = oss.str(); - msgQueueClient = messageqcpp::MessageQueueClientPool::getInstance(oss.str()); - - if (!get_file_sizes(msgQueueClient, fullFileName, &fileSize, &compressedFileSize)) - { - messageqcpp::MessageQueueClientPool::releaseInstance(msgQueueClient); - delete emp; + int result = generate_result(oid, emp, table, thd); + if (result) return 1; - } - table->field[3]->store(fullFileName, strlen(fullFileName), cs); - - if (fileSize > 0) - { - table->field[4]->set_notnull(); - table->field[4]->store(fileSize); - if (compressedFileSize > 0) - { - table->field[5]->set_notnull(); - table->field[5]->store(compressedFileSize); - } - else - { - table->field[5]->set_null(); - } - } - else - { - table->field[4]->set_null(); - table->field[5]->set_null(); - } - - if (schema_table_store_record(thd, table)) - { - messageqcpp::MessageQueueClientPool::releaseInstance(msgQueueClient); - delete emp; - return 1; - } - iter++; - messageqcpp::MessageQueueClientPool::releaseInstance(msgQueueClient); - msgQueueClient = NULL; } } delete emp; diff --git a/dbcon/mysql/is_columnstore_tables.cpp b/dbcon/mysql/is_columnstore_tables.cpp index 47ce4970c..baa894487 100644 --- a/dbcon/mysql/is_columnstore_tables.cpp +++ b/dbcon/mysql/is_columnstore_tables.cpp @@ -42,22 +42,91 @@ ST_FIELD_INFO is_columnstore_tables_fields[] = {0, 0, MYSQL_TYPE_NULL, 0, 0, 0, 0} }; +static void get_cond_item(Item_func *item, String **table, String **db) +{ + char tmp_char[MAX_FIELD_WIDTH]; + Item_field *item_field = (Item_field*) item->arguments()[0]->real_item(); + if (strcasecmp(item_field->field_name, "table_name") == 0) + { + String str_buf(tmp_char, sizeof(tmp_char), system_charset_info); + *table = item->arguments()[1]->val_str(&str_buf); + return; + } + else if (strcasecmp(item_field->field_name, "table_schema") == 0) + { + String str_buf(tmp_char, sizeof(tmp_char), system_charset_info); + *db = item->arguments()[1]->val_str(&str_buf); + return; + } +} + +static void get_cond_items(COND *cond, String **table, String **db) +{ + if (cond->type() == Item::FUNC_ITEM) + { + Item_func* fitem = (Item_func*) cond; + if (fitem->arguments()[0]->real_item()->type() == Item::FIELD_ITEM && + fitem->arguments()[1]->const_item()) + { + get_cond_item(fitem, table, db); + } + } + else if ((cond->type() == Item::COND_ITEM) && (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)) + { + List_iterator li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item= li++)) + { + if (item->type() == Item::FUNC_ITEM) + { + get_cond_item((Item_func*)item, table, db); + } + else + { + get_cond_items(item, table, db); + } + } + } +} + static int is_columnstore_tables_fill(THD *thd, TABLE_LIST *tables, COND *cond) { CHARSET_INFO *cs = system_charset_info; TABLE *table = tables->table; + String *table_name = NULL; + String *db_name = NULL; boost::shared_ptr systemCatalogPtr = execplan::CalpontSystemCatalog::makeCalpontSystemCatalog(execplan::CalpontSystemCatalog::idb_tid2sid(thd->thread_id)); systemCatalogPtr->identity(execplan::CalpontSystemCatalog::FE); + if (cond) + { + get_cond_items(cond, &table_name, &db_name); + } + const std::vector< std::pair > catalog_tables = systemCatalogPtr->getTables(); for (std::vector >::const_iterator it = catalog_tables.begin(); it != catalog_tables.end(); ++it) { + if (db_name) + { + if ((*it).second.schema.compare(db_name->ptr()) != 0) + { + continue; + } + } + if (table_name) + { + if ((*it).second.table.compare(table_name->ptr()) != 0) + { + continue; + } + } + execplan::CalpontSystemCatalog::TableInfo tb_info = systemCatalogPtr->tableInfo((*it).second); std::string create_date = dataconvert::DataConvert::dateToString((*it).second.create_date); table->field[0]->store((*it).second.schema.c_str(), (*it).second.schema.length(), cs); From b6941d5ba30e1e412419dbfe79f14291298d2c64 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Wed, 20 Jun 2018 21:04:55 +0300 Subject: [PATCH 053/123] MCOL-1456 GROUP BY handler doesn`t process queries in autoswitch mode. --- dbcon/mysql/ha_calpont.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbcon/mysql/ha_calpont.cpp b/dbcon/mysql/ha_calpont.cpp index 35953fc34..79efdb88e 100644 --- a/dbcon/mysql/ha_calpont.cpp +++ b/dbcon/mysql/ha_calpont.cpp @@ -1156,7 +1156,8 @@ create_calpont_group_by_handler(THD* thd, Query* query) { ha_calpont_group_by_handler* handler = NULL; - if ( thd->infinidb_vtable.vtable_state == THD::INFINIDB_DISABLE_VTABLE ) + if ( thd->infinidb_vtable.vtable_state == THD::INFINIDB_DISABLE_VTABLE + && thd->variables.infinidb_vtable_mode == 0) { handler = new ha_calpont_group_by_handler(thd, query); From f650db2d1032a437f2a9fa4c69b55539f4c1d5f2 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Thu, 21 Jun 2018 14:15:01 +0300 Subject: [PATCH 054/123] MCOL-1457 GROUP BY handler supports aliases of projected columns in ORDER BY list. --- dbcon/mysql/ha_calpont_execplan.cpp | 34 +++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 8df06c6b4..ca862c938 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -8089,9 +8089,9 @@ int cp_get_group_plan(THD* thd, SCSEP& csep, cal_impl_if::cal_group_info& gi) gwi.thd = thd; int status = getGroupPlan(gwi, select_lex, csep, gi); -// cerr << "---------------- cp_get_group_plan EXECUTION PLAN ----------------" << endl; -// cerr << *csep << endl ; -// cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; + cerr << "---------------- cp_get_group_plan EXECUTION PLAN ----------------" << endl; + cerr << *csep << endl ; + cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; if (status > 0) return ER_INTERNAL_ERROR; @@ -9498,6 +9498,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro else { Item* ord_item = *(ordercol->item); + bool nonAggField = true; // ignore not_used column on order by. if (ord_item->type() == Item::INT_ITEM && ord_item->full_name() && string(ord_item->full_name()) == "Not_used") @@ -9506,11 +9507,36 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro rc = gwi.returnedCols[((Item_int*)ord_item)->val_int() - 1]->clone(); else if (ord_item->type() == Item::SUBSELECT_ITEM) gwi.fatalParseError = true; + else if (ordercol->in_field_list && ord_item->type() == Item::FIELD_ITEM) + { + rc = buildReturnedColumn(ord_item, gwi, gwi.fatalParseError); + Item_field* ifp = static_cast(ord_item); + + // The item must be an alias for a projected column + // and extended SELECT list must contain a proper rc + // either aggregation or a field. + if (!rc && ifp->name_length) + { + gwi.fatalParseError = false; + execplan::CalpontSelectExecutionPlan::ReturnedColumnList::iterator iter = gwi.returnedCols.begin(); + AggregateColumn* ac = NULL; + + for ( ; iter != gwi.returnedCols.end(); iter++ ) + { + if ( (*iter).get()->alias() == ord_item->name ) + { + rc = (*iter).get()->clone(); + nonAggField = rc->hasAggregate() ? false : true; + break; + } + } + } + } else rc = buildReturnedColumn(ord_item, gwi, gwi.fatalParseError); // Looking for a match for this item in GROUP BY list. - if ( rc && ord_item->type() == Item::FIELD_ITEM ) + if ( rc && ord_item->type() == Item::FIELD_ITEM && nonAggField) { execplan::CalpontSelectExecutionPlan::ReturnedColumnList::iterator iter = gwi.groupByCols.begin(); From 8c90419852513390cabed62650ff77b8904c1001 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Fri, 22 Jun 2018 15:31:31 +0100 Subject: [PATCH 055/123] Fix merge and coding style issues --- dbcon/execplan/aggregatecolumn.cpp | 14 +- dbcon/execplan/aggregatecolumn.h | 6 +- dbcon/joblist/joblistfactory.cpp | 62 +- dbcon/joblist/tupleaggregatestep.cpp | 83 ++- dbcon/joblist/windowfunctionstep.cpp | 1 + dbcon/mysql/ha_calpont_dml.cpp | 1 + dbcon/mysql/ha_calpont_execplan.cpp | 753 ++++++++++---------- dbcon/mysql/ha_calpont_impl.cpp | 3 +- dbcon/mysql/ha_window_function.cpp | 2 +- dbcon/mysql/is_columnstore_columns.cpp | 20 +- dbcon/mysql/is_columnstore_extents.cpp | 44 +- dbcon/mysql/is_columnstore_files.cpp | 38 +- dbcon/mysql/is_columnstore_tables.cpp | 20 +- oam/oamcpp/liboamcpp.cpp | 322 +++++---- oam/oamcpp/liboamcpp.h | 6 +- primitives/primproc/primitiveserver.cpp | 108 +-- procmgr/main.cpp | 38 +- procmgr/processmanager.cpp | 14 +- procmon/main.cpp | 10 +- procmon/processmonitor.cpp | 17 +- utils/dataconvert/dataconvert.cpp | 2 + utils/funcexp/func_date.cpp | 2 +- utils/funcexp/func_day.cpp | 2 +- utils/funcexp/func_dayname.cpp | 4 +- utils/funcexp/func_dayofweek.cpp | 2 +- utils/funcexp/func_dayofyear.cpp | 2 +- utils/funcexp/func_last_day.cpp | 2 +- utils/funcexp/func_month.cpp | 2 +- utils/funcexp/func_monthname.cpp | 4 +- utils/funcexp/func_quarter.cpp | 2 +- utils/funcexp/func_to_days.cpp | 2 +- utils/funcexp/func_week.cpp | 2 +- utils/funcexp/func_weekday.cpp | 2 +- utils/funcexp/func_year.cpp | 2 +- utils/funcexp/func_yearweek.cpp | 2 +- utils/libmysql_client/libmysql_client.cpp | 13 +- utils/libmysql_client/libmysql_client.h | 10 +- utils/rowgroup/rowaggregation.cpp | 58 +- utils/rowgroup/rowaggregation.h | 10 +- utils/threadpool/prioritythreadpool.cpp | 66 +- utils/udfsdk/avgx.cpp | 2 + utils/udfsdk/mcsv1_udaf.cpp | 2 +- utils/udfsdk/regr_avgx.cpp | 4 + utils/udfsdk/udfmysql.cpp | 170 ++--- utils/windowfunction/wf_udaf.cpp | 43 +- utils/windowfunction/wf_udaf.h | 2 +- utils/windowfunction/windowfunctiontype.cpp | 1 + utils/windowfunction/windowfunctiontype.h | 2 +- writeengine/wrapper/writeengine.cpp | 144 ++-- 49 files changed, 1198 insertions(+), 925 deletions(-) diff --git a/dbcon/execplan/aggregatecolumn.cpp b/dbcon/execplan/aggregatecolumn.cpp index 5bce12d79..c996dad17 100644 --- a/dbcon/execplan/aggregatecolumn.cpp +++ b/dbcon/execplan/aggregatecolumn.cpp @@ -107,8 +107,8 @@ AggregateColumn::AggregateColumn(const string& functionName, const string& conte fData(functionName + "(" + content + ")") { // TODO: need to handle distinct - SRCP srcp(new ArithmeticColumn(content)); - fAggParms.push_back(srcp); + SRCP srcp(new ArithmeticColumn(content)); + fAggParms.push_back(srcp); } AggregateColumn::AggregateColumn( const AggregateColumn& rhs, const uint32_t sessionID ): @@ -144,6 +144,7 @@ const string AggregateColumn::toString() const { output << *(fAggParms[i]) << " "; } + output << endl; if (fConstCol) @@ -167,6 +168,7 @@ void AggregateColumn::serialize(messageqcpp::ByteStream& b) const b << static_cast(fAggOp); b << static_cast(fAggParms.size()); + for (uint32_t i = 0; i < fAggParms.size(); ++i) { fAggParms[i]->serialize(b); @@ -208,6 +210,7 @@ void AggregateColumn::unserialize(messageqcpp::ByteStream& b) b >> fAggOp; b >> size; + for (i = 0; i < size; i++) { rc = dynamic_cast(ObjectReader::createTreeNode(b)); @@ -264,9 +267,10 @@ bool AggregateColumn::operator==(const AggregateColumn& t) const { return false; } - for (it = fAggParms.begin(), it2 = t.fAggParms.begin(); - it != fAggParms.end(); - ++it, ++it2) + + for (it = fAggParms.begin(), it2 = t.fAggParms.begin(); + it != fAggParms.end(); + ++it, ++it2) { if (**it != **it2) return false; diff --git a/dbcon/execplan/aggregatecolumn.h b/dbcon/execplan/aggregatecolumn.h index b0884f179..07bbab0b6 100644 --- a/dbcon/execplan/aggregatecolumn.h +++ b/dbcon/execplan/aggregatecolumn.h @@ -163,7 +163,7 @@ public: fAggParms = parms; } - + /** return a copy of this pointer * * deep copy of this pointer and return the copy @@ -316,8 +316,8 @@ protected: uint8_t fAggOp; /** - * ReturnedColumn objects that are the arguments to this - * function + * ReturnedColumn objects that are the arguments to this + * function */ AggParms fAggParms; diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index 033bf2643..6fa0adbab 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -896,36 +896,44 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo continue; } + #if 0 // MCOL-1201 Add support for multi-parameter UDAnF - UDAFColumn* udafc = dynamic_cast(retCols[i].get()); - if (udafc != NULL) - { - srcp = udafc->aggParms()[0]; - const RowColumn* rcp = dynamic_cast(srcp.get()); + UDAFColumn* udafc = dynamic_cast(retCols[i].get()); - const vector& cols = rcp->columnVec(); - for (vector::const_iterator j = cols.begin(); j != cols.end(); j++) - { + if (udafc != NULL) + { + srcp = udafc->aggParms()[0]; + const RowColumn* rcp = dynamic_cast(srcp.get()); + + const vector& cols = rcp->columnVec(); + + for (vector::const_iterator j = cols.begin(); j != cols.end(); j++) + { srcp = *j; - if (dynamic_cast(srcp.get()) == NULL) - retCols.push_back(srcp); + + if (dynamic_cast(srcp.get()) == NULL) + retCols.push_back(srcp); // Do we need this? - const ArithmeticColumn* ac = dynamic_cast(srcp.get()); - const FunctionColumn* fc = dynamic_cast(srcp.get()); - if (ac != NULL || fc != NULL) - { - // bug 3728, make a dummy expression step for each expression. - scoped_ptr es(new ExpressionStep(jobInfo)); - es->expression(srcp, jobInfo); - } - } - continue; - } + const ArithmeticColumn* ac = dynamic_cast(srcp.get()); + const FunctionColumn* fc = dynamic_cast(srcp.get()); + + if (ac != NULL || fc != NULL) + { + // bug 3728, make a dummy expression step for each expression. + scoped_ptr es(new ExpressionStep(jobInfo)); + es->expression(srcp, jobInfo); + } + } + + continue; + } + #endif srcp = retCols[i]; const AggregateColumn* ag = dynamic_cast(retCols[i].get()); + // bug 3728 Make a dummy expression for srcp if it is an // expression. This is needed to fill in some stuff. // Note that es.expression does nothing if the item is not an expression. @@ -937,7 +945,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo } else { - // MCOL-1201 multi-argument aggregate. make a dummy expression + // MCOL-1201 multi-argument aggregate. make a dummy expression // step for each argument that is an expression. for (uint32_t i = 0; i < ag->aggParms().size(); ++i) { @@ -1017,7 +1025,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo // replace the aggregate on constant with a count(*) SRCP clone; UDAFColumn* udafc = dynamic_cast(aggc); - + if (udafc) { clone.reset(new UDAFColumn(*udafc, aggc->sessionID())); @@ -1026,14 +1034,15 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo { clone.reset(new AggregateColumn(*aggc, aggc->sessionID())); } - + jobInfo.constAggregate.insert(make_pair(i, clone)); aggc->aggOp(AggregateColumn::COUNT_ASTERISK); aggc->distinct(false); } - + srcp = aggParms[parm]; sc = dynamic_cast(srcp.get()); + if (parm == 0) { op = aggc->aggOp(); @@ -1042,7 +1051,9 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo { op = AggregateColumn::MULTI_PARM; } + doDistinct = aggc->distinct(); + if (aggParms.size() == 1) { // Set the col type based on the single parm. @@ -1050,6 +1061,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo // doesn't really make sense. updateAggregateColType(aggc, srcp, op, jobInfo); } + aggCt = aggc->resultType(); // As of bug3695, make sure varbinary is not used in aggregation. diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 0f981e68f..da91919f0 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -852,6 +852,7 @@ SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo) if (ac->aggOp() == ROWAGG_UDAF) { UDAFColumn* udafc = dynamic_cast(ac); + if (udafc) { constAggDataVec.push_back( @@ -1295,10 +1296,12 @@ void TupleAggregateStep::prep1PhaseAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; + for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); projColsUDAFIdx++; + if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -1307,6 +1310,7 @@ void TupleAggregateStep::prep1PhaseAggregate( break; } } + if (it == jobInfo.projectionCols.end()) { throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -1488,10 +1492,12 @@ void TupleAggregateStep::prep1PhaseAggregate( // If the first param is const udafcParamIdx = 0; ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; } + ++udafcParamIdx; break; } @@ -1504,10 +1510,12 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(width[colProj]); + // If the param is const if (udafc) { ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; @@ -1517,6 +1525,7 @@ void TupleAggregateStep::prep1PhaseAggregate( { throw QueryDataExcept("prep1PhaseAggregate: UDAF multi function with no parms", aggregateFuncErr); } + ++udafcParamIdx; } break; @@ -1892,6 +1901,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( break; } } + if (it == jobInfo.projectionCols.end()) { throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -2111,10 +2121,12 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // If the first param is const udafcParamIdx = 0; ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; } + ++udafcParamIdx; break; } @@ -2129,10 +2141,12 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( widthAgg.push_back(widthProj[colProj]); multiParmIndexes.push_back(colAgg); ++colAgg; + // If the param is const if (udafc) { ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; @@ -2142,6 +2156,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( { throw QueryDataExcept("prep1PhaseDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr); } + ++udafcParamIdx; } break; @@ -2193,9 +2208,10 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( groupByNoDist.push_back(groupby); aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i)); } - + // locate the return column position in aggregated rowgroup uint64_t outIdx = 0; + for (uint64_t i = 0; i < returnedColVec.size(); i++) { udafc = NULL; @@ -2240,16 +2256,19 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; + for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); projColsUDAFIdx++; + if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); break; } } + if (it == jobInfo.projectionCols.end()) { throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -2546,6 +2565,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( { // update the aggregate function vector SP_ROWAGG_FUNC_t funct; + if (aggOp == ROWAGG_UDAF) { funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, outIdx)); @@ -2589,6 +2609,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // for (i @@ -2839,6 +2860,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( ++multiParms; continue; } + if (returnedColVec[k].first != distinctColKey) continue; @@ -2859,7 +2881,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex-multiParms)); + f->fAuxColumnIndex - multiParms)); functionSub2.push_back(funct); } } @@ -2887,6 +2909,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( ++multiParms; continue; } + // search non-distinct functions in functionVec vector::iterator it = functionVec2.begin(); @@ -2902,7 +2925,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex-multiParms)); + udafFuncCol->fAuxColumnIndex - multiParms)); functionSub2.push_back(funct); } else if ((f->fOutputColumnIndex == k) && @@ -2924,7 +2947,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex-multiParms)); + f->fAuxColumnIndex - multiParms)); functionSub2.push_back(funct); } } @@ -3160,10 +3183,12 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; + for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); projColsUDAFIdx++; + if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3172,6 +3197,7 @@ void TupleAggregateStep::prep2PhasesAggregate( break; } } + if (it == jobInfo.projectionCols.end()) { throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -3394,10 +3420,12 @@ void TupleAggregateStep::prep2PhasesAggregate( // If the first param is const udafcParamIdx = 0; ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; } + ++udafcParamIdx; break; } @@ -3411,10 +3439,12 @@ void TupleAggregateStep::prep2PhasesAggregate( typeAggPm.push_back(typeProj[colProj]); widthAggPm.push_back(width[colProj]); colAggPm++; + // If the param is const if (udafc) { ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; @@ -3424,6 +3454,7 @@ void TupleAggregateStep::prep2PhasesAggregate( { throw QueryDataExcept("prep2PhasesAggregate: UDAF multi function with no parms", aggregateFuncErr); } + ++udafcParamIdx; } break; @@ -3451,6 +3482,7 @@ void TupleAggregateStep::prep2PhasesAggregate( AGG_MAP aggDupFuncMap; projColsUDAFIdx = 0; + // copy over the groupby vector // update the outputColumnIndex if returned for (uint64_t i = 0; i < groupByPm.size(); i++) @@ -3462,6 +3494,7 @@ void TupleAggregateStep::prep2PhasesAggregate( // locate the return column position in aggregated rowgroup from PM // outIdx is i without the multi-columns, uint64_t outIdx = 0; + for (uint64_t i = 0; i < returnedColVec.size(); i++) { uint32_t retKey = returnedColVec[i].first; @@ -3478,6 +3511,7 @@ void TupleAggregateStep::prep2PhasesAggregate( // Is this a UDAF? use the function as part of the key. pUDAFFunc = NULL; udafc = NULL; + if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; @@ -3486,12 +3520,14 @@ void TupleAggregateStep::prep2PhasesAggregate( { udafc = dynamic_cast((*it).get()); projColsUDAFIdx++; + if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); break; } } + if (it == jobInfo.projectionCols.end()) { throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -3644,6 +3680,7 @@ void TupleAggregateStep::prep2PhasesAggregate( { // update the aggregate function vector SP_ROWAGG_FUNC_t funct; + if (aggOp == ROWAGG_UDAF) { funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, outIdx)); @@ -3685,6 +3722,7 @@ void TupleAggregateStep::prep2PhasesAggregate( if (returnedColVec[i].second == AggregateColumn::AVG) avgFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } @@ -4029,10 +4067,12 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; + for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); projColsUDAFIdx++; + if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -4041,6 +4081,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( break; } } + if (it == jobInfo.projectionCols.end()) { throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -4259,10 +4300,12 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // If the first param is const udafcParamIdx = 0; ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; } + ++udafcParamIdx; break; } @@ -4277,10 +4320,12 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( widthAggPm.push_back(width[colProj]); multiParmIndexes.push_back(colAggPm); colAggPm++; + // If the param is const if (udafc) { ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); + if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; @@ -4290,6 +4335,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( { throw QueryDataExcept("prep2PhasesDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr); } + ++udafcParamIdx; } break; @@ -4332,15 +4378,17 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (funcPm->fAggFunction == ROWAGG_UDAF) { RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); + if (!udafFuncCol) { - throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } + funct.reset(new RowUDAFFunctionCol( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fOutputColumnIndex-multiParms, - udafFuncCol->fAuxColumnIndex-multiParms)); + udafFuncCol->fOutputColumnIndex - multiParms, + udafFuncCol->fAuxColumnIndex - multiParms)); functionNoDistVec.push_back(funct); pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); } @@ -4350,8 +4398,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funcPm->fAggFunction, funcPm->fStatsFunction, funcPm->fOutputColumnIndex, - funcPm->fOutputColumnIndex-multiParms, - funcPm->fAuxColumnIndex-multiParms)); + funcPm->fOutputColumnIndex - multiParms, + funcPm->fAuxColumnIndex - multiParms)); functionNoDistVec.push_back(funct); pUDAFFunc = NULL; } @@ -4364,6 +4412,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( { continue; } + oidsAggUm.push_back(oidsAggPm[idx]); keysAggUm.push_back(keysAggPm[idx]); scaleAggUm.push_back(scaleAggPm[idx]); @@ -4400,6 +4449,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // locate the return column position in aggregated rowgroup from PM // outIdx is i without the multi-columns, uint64_t outIdx = 0; + for (uint64_t i = 0; i < returnedColVec.size(); i++) { pUDAFFunc = NULL; @@ -4420,16 +4470,19 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; + for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); projColsUDAFIdx++; + if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); break; } } + if (it == jobInfo.projectionCols.end()) { throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -4606,6 +4659,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(retKey); scaleAggDist.push_back(0); + if (isUnsigned(typeAggUm[colUm])) { precisionAggDist.push_back(20); @@ -4616,6 +4670,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( precisionAggDist.push_back(19); typeAggDist.push_back(CalpontSystemCatalog::BIGINT); } + widthAggDist.push_back(bigIntWidth); } } @@ -4702,6 +4757,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( { // update the aggregate function vector SP_ROWAGG_FUNC_t funct; + if (aggOp == ROWAGG_UDAF) { funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, outIdx)); @@ -4745,6 +4801,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } + ++outIdx; } // for (i @@ -4987,6 +5044,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( ++multiParms; continue; } + if (returnedColVec[k].first != distinctColKey) continue; @@ -5008,7 +5066,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex-multiParms)); + f->fAuxColumnIndex - multiParms)); functionSub2.push_back(funct); } } @@ -5034,6 +5092,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( ++multiParms; continue; } + // search non-distinct functions in functionVec vector::iterator it = functionVecUm.begin(); @@ -5051,7 +5110,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex-multiParms)); + udafFuncCol->fAuxColumnIndex - multiParms)); functionSub2.push_back(funct); } else if (f->fAggFunction == ROWAGG_COUNT_ASTERISK || @@ -5072,7 +5131,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex-multiParms)); + f->fAuxColumnIndex - multiParms)); functionSub2.push_back(funct); } } diff --git a/dbcon/joblist/windowfunctionstep.cpp b/dbcon/joblist/windowfunctionstep.cpp index 2a93f680b..823b2bd04 100644 --- a/dbcon/joblist/windowfunctionstep.cpp +++ b/dbcon/joblist/windowfunctionstep.cpp @@ -655,6 +655,7 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) // make sure index is in range else if (fields.size() > 1 && fields[1] >= 0 && static_cast(fields[1]) < types.size()) ct = types[fields[1]]; + // workaround for functions using "within group (order by)" syntax string fn = boost::to_upper_copy(wc->functionName()); diff --git a/dbcon/mysql/ha_calpont_dml.cpp b/dbcon/mysql/ha_calpont_dml.cpp index a9b64b757..67aab9721 100644 --- a/dbcon/mysql/ha_calpont_dml.cpp +++ b/dbcon/mysql/ha_calpont_dml.cpp @@ -899,6 +899,7 @@ int ha_calpont_impl_write_batch_row_(uchar* buf, TABLE* table, cal_impl_if::cal_ { fprintf(ci.filePtr, "-"); } + if (!ltime.second_part) { fprintf(ci.filePtr, "%02d:%02d:%02d%c", diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 8df06c6b4..4cedeecb6 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4039,8 +4039,8 @@ ParseTree* buildParseTree(Item_func* item, gp_walk_info& gwi, bool& nonSupport) ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { // MCOL-1201 For UDAnF multiple parameters - vector selCols; - vector orderCols; + vector selCols; + vector orderCols; if (!(gwi.thd->infinidb_vtable.cal_conn_info)) gwi.thd->infinidb_vtable.cal_conn_info = (void*)(new cal_connection_info()); @@ -4059,6 +4059,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // N.B. argument_count() is the # of formal parms to the agg fcn. InifniDB only supports 1 argument // TODO: Support more than one parm #if 0 + if (isp->argument_count() != 1 && isp->sum_func() != Item_sum::GROUP_CONCAT_FUNC && isp->sum_func() != Item_sum::UDF_SUM_FUNC) { @@ -4066,6 +4067,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_MUL_ARG_AGG); return NULL; } + #endif AggregateColumn* ac = NULL; @@ -4089,446 +4091,459 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { gwi.fatalParseError = true; gwi.parseErrorText = "Non supported aggregate type on the select clause"; + if (ac) delete ac; + return NULL; } try { - // special parsing for group_concat - if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) - { - Item_func_group_concat* gc = (Item_func_group_concat*)isp; - vector orderCols; - RowColumn* rowCol = new RowColumn(); - vector selCols; - - uint32_t select_ctn = gc->count_field(); - ReturnedColumn* rc = NULL; - - for (uint32_t i = 0; i < select_ctn; i++) + // special parsing for group_concat + if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) { - rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); + Item_func_group_concat* gc = (Item_func_group_concat*)isp; + vector orderCols; + RowColumn* rowCol = new RowColumn(); + vector selCols; - if (!rc || gwi.fatalParseError) - { - if (ac) - delete ac; - return NULL; - } + uint32_t select_ctn = gc->count_field(); + ReturnedColumn* rc = NULL; - selCols.push_back(SRCP(rc)); - } - - ORDER** order_item, **end; - - for (order_item = gc->get_order(), - end = order_item + gc->order_field(); order_item < end; - order_item++) - { - Item* ord_col = *(*order_item)->item; - - if (ord_col->type() == Item::INT_ITEM) + for (uint32_t i = 0; i < select_ctn; i++) { - Item_int* id = (Item_int*)ord_col; - - if (id->val_int() > (int)selCols.size()) - { - gwi.fatalParseError = true; - if (ac) - delete ac; - return NULL; - } - - rc = selCols[id->val_int() - 1]->clone(); - rc->orderPos(id->val_int() - 1); - } - else - { - rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); + rc = buildReturnedColumn(sfitempp[i], gwi, gwi.fatalParseError); if (!rc || gwi.fatalParseError) { - if (ac) - delete ac; + if (ac) + delete ac; + return NULL; } + + selCols.push_back(SRCP(rc)); } - // 10.2 TODO: direction is now a tri-state flag - rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); - orderCols.push_back(SRCP(rc)); - } + ORDER** order_item, **end; - rowCol->columnVec(selCols); - (dynamic_cast(ac))->orderCols(orderCols); - parm.reset(rowCol); - ac->aggParms().push_back(parm); - - if (gc->str_separator()) - { - string separator; - separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); - (dynamic_cast(ac))->separator(separator); - } - } - else - { - for (uint32_t i = 0; i < isp->argument_count(); i++) - { - Item* sfitemp = sfitempp[i]; - Item::Type sfitype = sfitemp->type(); - - switch (sfitype) + for (order_item = gc->get_order(), + end = order_item + gc->order_field(); order_item < end; + order_item++) { - case Item::FIELD_ITEM: - { - Item_field* ifp = reinterpret_cast(sfitemp); - SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + Item* ord_col = *(*order_item)->item; - if (!sc) + if (ord_col->type() == Item::INT_ITEM) + { + Item_int* id = (Item_int*)ord_col; + + if (id->val_int() > (int)selCols.size()) { gwi.fatalParseError = true; + + if (ac) + delete ac; + + return NULL; + } + + rc = selCols[id->val_int() - 1]->clone(); + rc->orderPos(id->val_int() - 1); + } + else + { + rc = buildReturnedColumn(ord_col, gwi, gwi.fatalParseError); + + if (!rc || gwi.fatalParseError) + { + if (ac) + delete ac; + + return NULL; + } + } + + // 10.2 TODO: direction is now a tri-state flag + rc->asc((*order_item)->direction == ORDER::ORDER_ASC ? true : false); + orderCols.push_back(SRCP(rc)); + } + + rowCol->columnVec(selCols); + (dynamic_cast(ac))->orderCols(orderCols); + parm.reset(rowCol); + ac->aggParms().push_back(parm); + + if (gc->str_separator()) + { + string separator; + separator.assign(gc->str_separator()->ptr(), gc->str_separator()->length()); + (dynamic_cast(ac))->separator(separator); + } + } + else + { + for (uint32_t i = 0; i < isp->argument_count(); i++) + { + Item* sfitemp = sfitempp[i]; + Item::Type sfitype = sfitemp->type(); + + switch (sfitype) + { + case Item::FIELD_ITEM: + { + Item_field* ifp = reinterpret_cast(sfitemp); + SimpleColumn* sc = buildSimpleColumn(ifp, gwi); + + if (!sc) + { + gwi.fatalParseError = true; + break; + } + + parm.reset(sc); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); + TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); + gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); break; } - parm.reset(sc); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); - TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); - gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); - break; - } + case Item::INT_ITEM: + case Item::STRING_ITEM: + case Item::REAL_ITEM: + case Item::DECIMAL_ITEM: + { + // treat as count(*) + if (ac->aggOp() == AggregateColumn::COUNT) + ac->aggOp(AggregateColumn::COUNT_ASTERISK); - case Item::INT_ITEM: - case Item::STRING_ITEM: - case Item::REAL_ITEM: - case Item::DECIMAL_ITEM: - { - // treat as count(*) - if (ac->aggOp() == AggregateColumn::COUNT) - ac->aggOp(AggregateColumn::COUNT_ASTERISK); parm.reset(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError)); ac->constCol(parm); - break; - } - - case Item::NULL_ITEM: - { - parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); - ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); - break; - } - - case Item::FUNC_ITEM: - { - Item_func* ifp = (Item_func*)sfitemp; - ReturnedColumn* rc = 0; - - // check count(1+1) case - vector tmpVec; - uint16_t parseInfo = 0; - parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); - - if (parseInfo & SUB_BIT) - { - gwi.fatalParseError = true; break; } - else if (!gwi.fatalParseError && - !(parseInfo & AGG_BIT) && - !(parseInfo & AF_BIT) && - tmpVec.size() == 0) + + case Item::NULL_ITEM: { - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - FunctionColumn* fc = dynamic_cast(rc); + parm.reset(new ConstantColumn("", ConstantColumn::NULLDATA)); + ac->constCol(SRCP(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError))); + break; + } - if ((fc && fc->functionParms().empty()) || !fc) + case Item::FUNC_ITEM: + { + Item_func* ifp = (Item_func*)sfitemp; + ReturnedColumn* rc = 0; + + // check count(1+1) case + vector tmpVec; + uint16_t parseInfo = 0; + parse_item(ifp, tmpVec, gwi.fatalParseError, parseInfo); + + if (parseInfo & SUB_BIT) { - //ac->aggOp(AggregateColumn::COUNT_ASTERISK); - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + gwi.fatalParseError = true; + break; + } + else if (!gwi.fatalParseError && + !(parseInfo & AGG_BIT) && + !(parseInfo & AF_BIT) && + tmpVec.size() == 0) + { + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + FunctionColumn* fc = dynamic_cast(rc); - if (dynamic_cast(rc)) + if ((fc && fc->functionParms().empty()) || !fc) { - //@bug5229. handle constant function on aggregate argument - ac->constCol(SRCP(rc)); - break; + //ac->aggOp(AggregateColumn::COUNT_ASTERISK); + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (dynamic_cast(rc)) + { + //@bug5229. handle constant function on aggregate argument + ac->constCol(SRCP(rc)); + break; + } } } + + // MySQL carelessly allows correlated aggregate function on the WHERE clause. + // Here is the work around to deal with that inconsistence. + // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; + ClauseType clauseType = gwi.clauseType; + + if (gwi.clauseType == WHERE) + gwi.clauseType = HAVING; + + // @bug 3603. for cases like max(rand()). try to build function first. + if (!rc) + rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); + + parm.reset(rc); + gwi.clauseType = clauseType; + + if (gwi.fatalParseError) + break; + + break; + } + + case Item::REF_ITEM: + { + ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); + + if (rc) + { + parm.reset(rc); + break; + } } - // MySQL carelessly allows correlated aggregate function on the WHERE clause. - // Here is the work around to deal with that inconsistence. - // e.g., SELECT (SELECT t.c FROM t1 AS t WHERE t.b=MAX(t1.b + 0)) FROM t1; - ClauseType clauseType = gwi.clauseType; - - if (gwi.clauseType == WHERE) - gwi.clauseType = HAVING; - - // @bug 3603. for cases like max(rand()). try to build function first. - if (!rc) - rc = buildFunctionColumn(ifp, gwi, gwi.fatalParseError); - - parm.reset(rc); - gwi.clauseType = clauseType; - - if (gwi.fatalParseError) - break; - - break; - } - - case Item::REF_ITEM: - { - ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError); - - if (rc) + default: { - parm.reset(rc); - break; + gwi.fatalParseError = true; + //gwi.parseErrorText = "Non-supported Item in Aggregate function"; } } - default: + if (gwi.fatalParseError) { - gwi.fatalParseError = true; - //gwi.parseErrorText = "Non-supported Item in Aggregate function"; - } - } + if (gwi.parseErrorText.empty()) + { + Message::Args args; - if (gwi.fatalParseError) - { - if (gwi.parseErrorText.empty()) - { - Message::Args args; + if (item->name) + args.add(item->name); + else + args.add(""); - if (item->name) - args.add(item->name); - else - args.add(""); + gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); + } - gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_AGG_ARGS, args); + if (ac) + delete ac; + + return NULL; } - if (ac) - delete ac; - return NULL; - } - if (parm) - { - // MCOL-1201 multi-argument aggregate - ac->aggParms().push_back(parm); + if (parm) + { + // MCOL-1201 multi-argument aggregate + ac->aggParms().push_back(parm); + } } } - } // Get result type // Modified for MCOL-1201 multi-argument aggregate if (ac->aggParms().size() > 0) - { + { // These are all one parm functions, so we can safely // use the first parm for result type. parm = ac->aggParms()[0]; - if (isp->sum_func() == Item_sum::AVG_FUNC || - isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - switch (ct.colDataType) + if (isp->sum_func() == Item_sum::AVG_FUNC || + isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC) { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::DECIMAL; - ct.colWidth = 8; - ct.scale += 4; - break; + CalpontSystemCatalog::ColType ct = parm->resultType(); + + switch (ct.colDataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::DECIMAL; + ct.colWidth = 8; + ct.scale += 4; + break; #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; #endif - default: - break; + default: + break; + } + + ac->resultType(ct); } - - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::COUNT_FUNC || - isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = parm->resultType().scale; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_FUNC || - isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) - { - CalpontSystemCatalog::ColType ct = parm->resultType(); - - switch (ct.colDataType) + else if (isp->sum_func() == Item_sum::COUNT_FUNC || + isp->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - ct.colDataType = CalpontSystemCatalog::BIGINT; + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = parm->resultType().scale; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_FUNC || + isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC) + { + CalpontSystemCatalog::ColType ct = parm->resultType(); - // no break, let fall through + switch (ct.colDataType) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + ct.colDataType = CalpontSystemCatalog::BIGINT; - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - ct.colWidth = 8; - break; + // no break, let fall through - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - ct.colDataType = CalpontSystemCatalog::UBIGINT; - ct.colWidth = 8; - break; + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + ct.colWidth = 8; + break; + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + ct.colDataType = CalpontSystemCatalog::UBIGINT; + ct.colWidth = 8; + break; #if PROMOTE_FLOAT_TO_DOUBLE_ON_SUM - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - break; + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + break; #endif - default: - break; - } + default: + break; + } - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::STD_FUNC || - isp->sum_func() == Item_sum::VARIANCE_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::DOUBLE; - ct.colWidth = 8; - ct.scale = 0; - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) - { - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::BIGINT; - ct.colWidth = 8; - ct.scale = 0; - ct.precision = -16; // borrowed to indicate skip null value check on connector - ac->resultType(ct); - } - else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) - { - //Item_func_group_concat* gc = (Item_func_group_concat*)isp; - CalpontSystemCatalog::ColType ct; - ct.colDataType = CalpontSystemCatalog::VARCHAR; - ct.colWidth = isp->max_length; - ct.precision = 0; - ac->resultType(ct); + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::STD_FUNC || + isp->sum_func() == Item_sum::VARIANCE_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::DOUBLE; + ct.colWidth = 8; + ct.scale = 0; + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::SUM_BIT_FUNC) + { + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::BIGINT; + ct.colWidth = 8; + ct.scale = 0; + ct.precision = -16; // borrowed to indicate skip null value check on connector + ac->resultType(ct); + } + else if (isp->sum_func() == Item_sum::GROUP_CONCAT_FUNC) + { + //Item_func_group_concat* gc = (Item_func_group_concat*)isp; + CalpontSystemCatalog::ColType ct; + ct.colDataType = CalpontSystemCatalog::VARCHAR; + ct.colWidth = isp->max_length; + ct.precision = 0; + ac->resultType(ct); + } + else + { + // UDAF result type will be set below. + ac->resultType(parm->resultType()); + } } else { - // UDAF result type will be set below. - ac->resultType(parm->resultType()); + ac->resultType(colType_MysqlToIDB(isp)); } - } - else - { - ac->resultType(colType_MysqlToIDB(isp)); - } - // adjust decimal result type according to internalDecimalScale - if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) - { - CalpontSystemCatalog::ColType ct = ac->resultType(); - ct.scale = gwi.internalDecimalScale; - ac->resultType(ct); - } - - // check for same aggregate on the select list - ac->expressionId(ci->expressionId++); - - if (gwi.clauseType != SELECT) - { - for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) + // adjust decimal result type according to internalDecimalScale + if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL) { - if (*ac == gwi.returnedCols[i].get()) - ac->expressionId(gwi.returnedCols[i]->expressionId()); + CalpontSystemCatalog::ColType ct = ac->resultType(); + ct.scale = gwi.internalDecimalScale; + ac->resultType(ct); } - } - // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will - // be applied in ExeMgr. When the ExeMgr fix is available, this checking - // will be taken out. + // check for same aggregate on the select list + ac->expressionId(ci->expressionId++); + + if (gwi.clauseType != SELECT) + { + for (uint32_t i = 0; i < gwi.returnedCols.size(); i++) + { + if (*ac == gwi.returnedCols[i].get()) + ac->expressionId(gwi.returnedCols[i]->expressionId()); + } + } + + // @bug5977 @note Temporary fix to avoid mysqld crash. The permanent fix will + // be applied in ExeMgr. When the ExeMgr fix is available, this checking + // will be taken out. if (isp->sum_func() != Item_sum::UDF_SUM_FUNC) { - if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) - { - gwi.fatalParseError = true; - gwi.parseErrorText = "No project column found for aggregate function"; + if (ac->constCol() && gwi.tbList.empty() && gwi.derivedTbList.empty()) + { + gwi.fatalParseError = true; + gwi.parseErrorText = "No project column found for aggregate function"; + if (ac) delete ac; - return NULL; - } - else if (ac->constCol()) - { - gwi.count_asterisk_list.push_back(ac); - } + + return NULL; + } + else if (ac->constCol()) + { + gwi.count_asterisk_list.push_back(ac); + } } - // For UDAF, populate the context and call the UDAF init() function. + // For UDAF, populate the context and call the UDAF init() function. // The return type is (should be) set in context by init(). - if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) - { - UDAFColumn* udafc = dynamic_cast(ac); - - if (udafc) + if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) { - mcsv1Context& context = udafc->getContext(); - context.setName(isp->func_name()); + UDAFColumn* udafc = dynamic_cast(ac); - // Set up the return type defaults for the call to init() - context.setResultType(udafc->resultType().colDataType); - context.setColWidth(udafc->resultType().colWidth); - context.setScale(udafc->resultType().scale); - context.setPrecision(udafc->resultType().precision); + if (udafc) + { + mcsv1Context& context = udafc->getContext(); + context.setName(isp->func_name()); + + // Set up the return type defaults for the call to init() + context.setResultType(udafc->resultType().colDataType); + context.setColWidth(udafc->resultType().colWidth); + context.setScale(udafc->resultType().scale); + context.setPrecision(udafc->resultType().precision); context.setParamCount(udafc->aggParms().size()); ColumnDatum colType; ColumnDatum colTypes[udafc->aggParms().size()]; + // Build the column type vector. // Modified for MCOL-1201 multi-argument aggregate for (uint32_t i = 0; i < udafc->aggParms().size(); ++i) { - const execplan::CalpontSystemCatalog::ColType& resultType + const execplan::CalpontSystemCatalog::ColType& resultType = udafc->aggParms()[i]->resultType(); colType.dataType = resultType.colDataType; colType.precision = resultType.precision; @@ -4536,65 +4551,78 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) colTypes[i] = colType; } - // Call the user supplied init() + // Call the user supplied init() mcsv1sdk::mcsv1_UDAF* udaf = context.getFunction(); + if (!udaf) { gwi.fatalParseError = true; gwi.parseErrorText = "Aggregate Function " + context.getName() + " doesn't exist in the ColumnStore engine"; + if (ac) delete ac; + return NULL; } - if (udaf->init(&context, colTypes) == mcsv1_UDAF::ERROR) - { - gwi.fatalParseError = true; - gwi.parseErrorText = udafc->getContext().getErrorMessage(); + + if (udaf->init(&context, colTypes) == mcsv1_UDAF::ERROR) + { + gwi.fatalParseError = true; + gwi.parseErrorText = udafc->getContext().getErrorMessage(); + if (ac) delete ac; - return NULL; - } + + return NULL; + } // UDAF_OVER_REQUIRED means that this function is for Window // Function only. Reject it here in aggregate land. - if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) - { - gwi.fatalParseError = true; - gwi.parseErrorText = - logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, - context.getName()); + if (udafc->getContext().getRunFlag(UDAF_OVER_REQUIRED)) + { + gwi.fatalParseError = true; + gwi.parseErrorText = + logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_WINDOW_FUNC_ONLY, + context.getName()); + if (ac) delete ac; - return NULL; - } - // Set the return type as set in init() - CalpontSystemCatalog::ColType ct; - ct.colDataType = context.getResultType(); - ct.colWidth = context.getColWidth(); - ct.scale = context.getScale(); - ct.precision = context.getPrecision(); - udafc->resultType(ct); + return NULL; + } + + // Set the return type as set in init() + CalpontSystemCatalog::ColType ct; + ct.colDataType = context.getResultType(); + ct.colWidth = context.getColWidth(); + ct.scale = context.getScale(); + ct.precision = context.getPrecision(); + udafc->resultType(ct); + } } } - } catch (std::logic_error e) { gwi.fatalParseError = true; gwi.parseErrorText = "error building Aggregate Function: "; gwi.parseErrorText += e.what(); + if (ac) delete ac; + return NULL; } catch (...) { gwi.fatalParseError = true; gwi.parseErrorText = "error building Aggregate Function: Unspecified exception"; + if (ac) delete ac; + return NULL; } + return ac; } @@ -7915,6 +7943,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i setError(gwi.thd, ER_INTERNAL_ERROR, gwi.parseErrorText, gwi); return ER_CHECK_NOT_IMPLEMENTED; } + // Replace the last (presumably constant) object with minSc if ((*coliter)->aggParms().empty()) { diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 3d4ee6ac3..dcafc4c38 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -782,7 +782,7 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, bool h //double double_val = *(double*)(&value); //f2->store(double_val); if ((f2->decimals() == DECIMAL_NOT_SPECIFIED && row.getScale(s) > 0) - || f2->decimals() < row.getScale(s)) + || f2->decimals() < row.getScale(s)) { f2->dec = row.getScale(s); } @@ -5278,6 +5278,7 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE execplan::CalpontSelectExecutionPlan::ColumnMap::iterator colMapIter; execplan::CalpontSelectExecutionPlan::ColumnMap::iterator condColMapIter; execplan::ParseTree* ptIt; + for (TABLE_LIST* tl = gi.groupByTables; tl; tl = tl->next_local) { mapiter = ci->tableMap.find(tl->table); diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp index 8d68a6260..4c04a402c 100644 --- a/dbcon/mysql/ha_window_function.cpp +++ b/dbcon/mysql/ha_window_function.cpp @@ -384,7 +384,7 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n // Modified for MCOL-1201 multi-argument aggregate for (size_t i = 0; i < funcParms.size(); ++i) { - const execplan::CalpontSystemCatalog::ColType& resultType + const execplan::CalpontSystemCatalog::ColType& resultType = funcParms[i]->resultType(); colType.dataType = resultType.colDataType; colType.precision = resultType.precision; diff --git a/dbcon/mysql/is_columnstore_columns.cpp b/dbcon/mysql/is_columnstore_columns.cpp index 1aa379724..84d81042c 100644 --- a/dbcon/mysql/is_columnstore_columns.cpp +++ b/dbcon/mysql/is_columnstore_columns.cpp @@ -56,10 +56,11 @@ ST_FIELD_INFO is_columnstore_columns_fields[] = }; -static void get_cond_item(Item_func *item, String **table, String **db) +static void get_cond_item(Item_func* item, String** table, String** db) { char tmp_char[MAX_FIELD_WIDTH]; - Item_field *item_field = (Item_field*) item->arguments()[0]->real_item(); + Item_field* item_field = (Item_field*) item->arguments()[0]->real_item(); + if (strcasecmp(item_field->field_name, "table_name") == 0) { String str_buf(tmp_char, sizeof(tmp_char), system_charset_info); @@ -74,13 +75,14 @@ static void get_cond_item(Item_func *item, String **table, String **db) } } -static void get_cond_items(COND *cond, String **table, String **db) +static void get_cond_items(COND* cond, String** table, String** db) { if (cond->type() == Item::FUNC_ITEM) { Item_func* fitem = (Item_func*) cond; + if (fitem->arguments()[0]->real_item()->type() == Item::FIELD_ITEM && - fitem->arguments()[1]->const_item()) + fitem->arguments()[1]->const_item()) { get_cond_item(fitem, table, db); } @@ -88,8 +90,9 @@ static void get_cond_items(COND *cond, String **table, String **db) else if ((cond->type() == Item::COND_ITEM) && (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)) { List_iterator li(*((Item_cond*) cond)->argument_list()); - Item *item; - while ((item= li++)) + Item* item; + + while ((item = li++)) { if (item->type() == Item::FUNC_ITEM) { @@ -107,8 +110,8 @@ static int is_columnstore_columns_fill(THD* thd, TABLE_LIST* tables, COND* cond) { CHARSET_INFO* cs = system_charset_info; TABLE* table = tables->table; - String *table_name = NULL; - String *db_name = NULL; + String* table_name = NULL; + String* db_name = NULL; boost::shared_ptr systemCatalogPtr = execplan::CalpontSystemCatalog::makeCalpontSystemCatalog(execplan::CalpontSystemCatalog::idb_tid2sid(thd->thread_id)); @@ -133,6 +136,7 @@ static int is_columnstore_columns_fill(THD* thd, TABLE_LIST* tables, COND* cond) continue; } } + if (table_name) { if ((*it).second.table.compare(table_name->ptr()) != 0) diff --git a/dbcon/mysql/is_columnstore_extents.cpp b/dbcon/mysql/is_columnstore_extents.cpp index c6dcba567..0fd42d3cf 100644 --- a/dbcon/mysql/is_columnstore_extents.cpp +++ b/dbcon/mysql/is_columnstore_extents.cpp @@ -52,7 +52,7 @@ ST_FIELD_INFO is_columnstore_extents_fields[] = {0, 0, MYSQL_TYPE_NULL, 0, 0, 0, 0} }; -static int generate_result(BRM::OID_t oid, BRM::DBRM *emp, TABLE *table, THD *thd) +static int generate_result(BRM::OID_t oid, BRM::DBRM* emp, TABLE* table, THD* thd) { CHARSET_INFO* cs = system_charset_info; std::vector entries; @@ -77,7 +77,7 @@ static int generate_result(BRM::OID_t oid, BRM::DBRM *emp, TABLE *table, THD *th table->field[1]->store("Column", strlen("Column"), cs); if (iter->partition.cprange.lo_val == std::numeric_limits::max() || - iter->partition.cprange.lo_val <= (std::numeric_limits::min() + 2)) + iter->partition.cprange.lo_val <= (std::numeric_limits::min() + 2)) { table->field[4]->set_null(); } @@ -88,7 +88,7 @@ static int generate_result(BRM::OID_t oid, BRM::DBRM *emp, TABLE *table, THD *th } if (iter->partition.cprange.hi_val == std::numeric_limits::max() || - iter->partition.cprange.hi_val <= (std::numeric_limits::min() + 2)) + iter->partition.cprange.hi_val <= (std::numeric_limits::min() + 2)) { table->field[5]->set_null(); } @@ -179,15 +179,17 @@ static int generate_result(BRM::OID_t oid, BRM::DBRM *emp, TABLE *table, THD *th iter++; } + return 0; } -static int is_columnstore_extents_fill(THD *thd, TABLE_LIST *tables, COND *cond) +static int is_columnstore_extents_fill(THD* thd, TABLE_LIST* tables, COND* cond) { BRM::OID_t cond_oid = 0; - TABLE *table = tables->table; + TABLE* table = tables->table; + + BRM::DBRM* emp = new BRM::DBRM(); - BRM::DBRM *emp = new BRM::DBRM(); if (!emp || !emp->isDBRMReady()) { return 1; @@ -196,13 +198,15 @@ static int is_columnstore_extents_fill(THD *thd, TABLE_LIST *tables, COND *cond) if (cond && cond->type() == Item::FUNC_ITEM) { Item_func* fitem = (Item_func*) cond; + if ((fitem->functype() == Item_func::EQ_FUNC) && (fitem->argument_count() == 2)) { - if(fitem->arguments()[0]->real_item()->type() == Item::FIELD_ITEM && - fitem->arguments()[1]->const_item()) + if (fitem->arguments()[0]->real_item()->type() == Item::FIELD_ITEM && + fitem->arguments()[1]->const_item()) { // WHERE object_id = value - Item_field *item_field = (Item_field*) fitem->arguments()[0]->real_item(); + Item_field* item_field = (Item_field*) fitem->arguments()[0]->real_item(); + if (strcasecmp(item_field->field_name, "object_id") == 0) { cond_oid = fitem->arguments()[1]->val_int(); @@ -210,10 +214,11 @@ static int is_columnstore_extents_fill(THD *thd, TABLE_LIST *tables, COND *cond) } } else if (fitem->arguments()[1]->real_item()->type() == Item::FIELD_ITEM && - fitem->arguments()[0]->const_item()) + fitem->arguments()[0]->const_item()) { // WHERE value = object_id - Item_field *item_field = (Item_field*) fitem->arguments()[1]->real_item(); + Item_field* item_field = (Item_field*) fitem->arguments()[1]->real_item(); + if (strcasecmp(item_field->field_name, "object_id") == 0) { cond_oid = fitem->arguments()[0]->val_int(); @@ -224,15 +229,17 @@ static int is_columnstore_extents_fill(THD *thd, TABLE_LIST *tables, COND *cond) else if (fitem->functype() == Item_func::IN_FUNC) { // WHERE object_id in (value1, value2) - Item_field *item_field = (Item_field*) fitem->arguments()[0]->real_item(); + Item_field* item_field = (Item_field*) fitem->arguments()[0]->real_item(); + if (strcasecmp(item_field->field_name, "object_id") == 0) { - for (unsigned int i=1; i < fitem->argument_count(); i++) + for (unsigned int i = 1; i < fitem->argument_count(); i++) { cond_oid = fitem->arguments()[i]->val_int(); int result = generate_result(cond_oid, emp, table, thd); + if (result) - return 1; + return 1; } } } @@ -240,13 +247,16 @@ static int is_columnstore_extents_fill(THD *thd, TABLE_LIST *tables, COND *cond) strcasecmp(fitem->func_name(), "find_in_set") == 0) { // WHERE FIND_IN_SET(object_id, values) - String *tmp_var = fitem->arguments()[1]->val_str(); + String* tmp_var = fitem->arguments()[1]->val_str(); std::stringstream ss(tmp_var->ptr()); + while (ss >> cond_oid) { int ret = generate_result(cond_oid, emp, table, thd); + if (ret) return 1; + if (ss.peek() == ',') ss.ignore(); } @@ -256,12 +266,14 @@ static int is_columnstore_extents_fill(THD *thd, TABLE_LIST *tables, COND *cond) execplan::ObjectIDManager oidm; BRM::OID_t MaxOID = oidm.size(); - for(BRM::OID_t oid = 3000; oid <= MaxOID; oid++) + for (BRM::OID_t oid = 3000; oid <= MaxOID; oid++) { int result = generate_result(oid, emp, table, thd); + if (result) return 1; } + delete emp; return 0; } diff --git a/dbcon/mysql/is_columnstore_files.cpp b/dbcon/mysql/is_columnstore_files.cpp index 815345474..71d61958f 100644 --- a/dbcon/mysql/is_columnstore_files.cpp +++ b/dbcon/mysql/is_columnstore_files.cpp @@ -84,7 +84,7 @@ static bool get_file_sizes(messageqcpp::MessageQueueClient* msgQueueClient, cons } } -static int generate_result(BRM::OID_t oid, BRM::DBRM *emp, TABLE *table, THD *thd) +static int generate_result(BRM::OID_t oid, BRM::DBRM* emp, TABLE* table, THD* thd) { std::vector entries; CHARSET_INFO* cs = system_charset_info; @@ -185,14 +185,15 @@ static int generate_result(BRM::OID_t oid, BRM::DBRM *emp, TABLE *table, THD *th messageqcpp::MessageQueueClientPool::releaseInstance(msgQueueClient); msgQueueClient = NULL; } + return 0; } -static int is_columnstore_files_fill(THD *thd, TABLE_LIST *tables, COND *cond) +static int is_columnstore_files_fill(THD* thd, TABLE_LIST* tables, COND* cond) { - BRM::DBRM *emp = new BRM::DBRM(); + BRM::DBRM* emp = new BRM::DBRM(); BRM::OID_t cond_oid = 0; - TABLE *table = tables->table; + TABLE* table = tables->table; if (!emp || !emp->isDBRMReady()) { @@ -202,13 +203,15 @@ static int is_columnstore_files_fill(THD *thd, TABLE_LIST *tables, COND *cond) if (cond && cond->type() == Item::FUNC_ITEM) { Item_func* fitem = (Item_func*) cond; + if ((fitem->functype() == Item_func::EQ_FUNC) && (fitem->argument_count() == 2)) { - if(fitem->arguments()[0]->real_item()->type() == Item::FIELD_ITEM && - fitem->arguments()[1]->const_item()) + if (fitem->arguments()[0]->real_item()->type() == Item::FIELD_ITEM && + fitem->arguments()[1]->const_item()) { // WHERE object_id = value - Item_field *item_field = (Item_field*) fitem->arguments()[0]->real_item(); + Item_field* item_field = (Item_field*) fitem->arguments()[0]->real_item(); + if (strcasecmp(item_field->field_name, "object_id") == 0) { cond_oid = fitem->arguments()[1]->val_int(); @@ -216,10 +219,11 @@ static int is_columnstore_files_fill(THD *thd, TABLE_LIST *tables, COND *cond) } } else if (fitem->arguments()[1]->real_item()->type() == Item::FIELD_ITEM && - fitem->arguments()[0]->const_item()) + fitem->arguments()[0]->const_item()) { // WHERE value = object_id - Item_field *item_field = (Item_field*) fitem->arguments()[1]->real_item(); + Item_field* item_field = (Item_field*) fitem->arguments()[1]->real_item(); + if (strcasecmp(item_field->field_name, "object_id") == 0) { cond_oid = fitem->arguments()[0]->val_int(); @@ -230,15 +234,17 @@ static int is_columnstore_files_fill(THD *thd, TABLE_LIST *tables, COND *cond) else if (fitem->functype() == Item_func::IN_FUNC) { // WHERE object_id in (value1, value2) - Item_field *item_field = (Item_field*) fitem->arguments()[0]->real_item(); + Item_field* item_field = (Item_field*) fitem->arguments()[0]->real_item(); + if (strcasecmp(item_field->field_name, "object_id") == 0) { - for (unsigned int i=1; i < fitem->argument_count(); i++) + for (unsigned int i = 1; i < fitem->argument_count(); i++) { cond_oid = fitem->arguments()[i]->val_int(); int result = generate_result(cond_oid, emp, table, thd); + if (result) - return 1; + return 1; } } } @@ -246,13 +252,16 @@ static int is_columnstore_files_fill(THD *thd, TABLE_LIST *tables, COND *cond) strcasecmp(fitem->func_name(), "find_in_set") == 0) { // WHERE FIND_IN_SET(object_id, values) - String *tmp_var = fitem->arguments()[1]->val_str(); + String* tmp_var = fitem->arguments()[1]->val_str(); std::stringstream ss(tmp_var->ptr()); + while (ss >> cond_oid) { int ret = generate_result(cond_oid, emp, table, thd); + if (ret) return 1; + if (ss.peek() == ',') ss.ignore(); } @@ -264,9 +273,10 @@ static int is_columnstore_files_fill(THD *thd, TABLE_LIST *tables, COND *cond) if (!cond_oid) { - for(BRM::OID_t oid = 3000; oid <= MaxOID; oid++) + for (BRM::OID_t oid = 3000; oid <= MaxOID; oid++) { int result = generate_result(oid, emp, table, thd); + if (result) return 1; } diff --git a/dbcon/mysql/is_columnstore_tables.cpp b/dbcon/mysql/is_columnstore_tables.cpp index ddc21e6f5..d422c2f90 100644 --- a/dbcon/mysql/is_columnstore_tables.cpp +++ b/dbcon/mysql/is_columnstore_tables.cpp @@ -42,10 +42,11 @@ ST_FIELD_INFO is_columnstore_tables_fields[] = {0, 0, MYSQL_TYPE_NULL, 0, 0, 0, 0} }; -static void get_cond_item(Item_func *item, String **table, String **db) +static void get_cond_item(Item_func* item, String** table, String** db) { char tmp_char[MAX_FIELD_WIDTH]; - Item_field *item_field = (Item_field*) item->arguments()[0]->real_item(); + Item_field* item_field = (Item_field*) item->arguments()[0]->real_item(); + if (strcasecmp(item_field->field_name, "table_name") == 0) { String str_buf(tmp_char, sizeof(tmp_char), system_charset_info); @@ -60,13 +61,14 @@ static void get_cond_item(Item_func *item, String **table, String **db) } } -static void get_cond_items(COND *cond, String **table, String **db) +static void get_cond_items(COND* cond, String** table, String** db) { if (cond->type() == Item::FUNC_ITEM) { Item_func* fitem = (Item_func*) cond; + if (fitem->arguments()[0]->real_item()->type() == Item::FIELD_ITEM && - fitem->arguments()[1]->const_item()) + fitem->arguments()[1]->const_item()) { get_cond_item(fitem, table, db); } @@ -74,8 +76,9 @@ static void get_cond_items(COND *cond, String **table, String **db) else if ((cond->type() == Item::COND_ITEM) && (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)) { List_iterator li(*((Item_cond*) cond)->argument_list()); - Item *item; - while ((item= li++)) + Item* item; + + while ((item = li++)) { if (item->type() == Item::FUNC_ITEM) { @@ -93,8 +96,8 @@ static int is_columnstore_tables_fill(THD* thd, TABLE_LIST* tables, COND* cond) { CHARSET_INFO* cs = system_charset_info; TABLE* table = tables->table; - String *table_name = NULL; - String *db_name = NULL; + String* table_name = NULL; + String* db_name = NULL; boost::shared_ptr systemCatalogPtr = execplan::CalpontSystemCatalog::makeCalpontSystemCatalog(execplan::CalpontSystemCatalog::idb_tid2sid(thd->thread_id)); @@ -119,6 +122,7 @@ static int is_columnstore_tables_fill(THD* thd, TABLE_LIST* tables, COND* cond) continue; } } + if (table_name) { if ((*it).second.table.compare(table_name->ptr()) != 0) diff --git a/oam/oamcpp/liboamcpp.cpp b/oam/oamcpp/liboamcpp.cpp index 2747bf1fa..5c63ffd20 100644 --- a/oam/oamcpp/liboamcpp.cpp +++ b/oam/oamcpp/liboamcpp.cpp @@ -5811,34 +5811,35 @@ bool Oam::autoMovePmDbroot(std::string residePM) exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); } - //detach first to make sure DBS can be detach before trying to move to another pm - DBRootConfigList::iterator pt3 = residedbrootConfigList.begin(); - for( ; pt3 != residedbrootConfigList.end() ; pt3++ ) - { - int dbrootID = *pt3; + //detach first to make sure DBS can be detach before trying to move to another pm + DBRootConfigList::iterator pt3 = residedbrootConfigList.begin(); - try - { - typedef std::vector dbrootList; - dbrootList dbrootlist; - dbrootlist.push_back(itoa(dbrootID)); + for ( ; pt3 != residedbrootConfigList.end() ; pt3++ ) + { + int dbrootID = *pt3; - amazonDetach(dbrootlist); - } - catch (exception& ) - { - writeLog("ERROR: amazonDetach failure", LOG_TYPE_ERROR ); - - //reattach - typedef std::vector dbrootList; - dbrootList dbrootlist; - dbrootlist.push_back(itoa(dbrootID)); + try + { + typedef std::vector dbrootList; + dbrootList dbrootlist; + dbrootlist.push_back(itoa(dbrootID)); - amazonAttach(residePM, dbrootlist); + amazonDetach(dbrootlist); + } + catch (exception& ) + { + writeLog("ERROR: amazonDetach failure", LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_DETACH_FAILURE); - } - } + //reattach + typedef std::vector dbrootList; + dbrootList dbrootlist; + dbrootlist.push_back(itoa(dbrootID)); + + amazonAttach(residePM, dbrootlist); + + exceptionControl("autoMovePmDbroot", API_DETACH_FAILURE); + } + } //get dbroot id for other PMs systemStorageInfo_t t; @@ -6359,15 +6360,17 @@ bool Oam::autoUnMovePmDbroot(std::string toPM) } if (!found) - writeLog("No dbroots found in ../Calpont/local/moveDbrootTransactionLog", LOG_TYPE_DEBUG ); - cout << "No dbroots found in " << fileName << endl; + { + writeLog("No dbroots found in ../Calpont/local/moveDbrootTransactionLog", LOG_TYPE_DEBUG ); + + cout << "No dbroots found in " << fileName << endl; } oldFile.close(); unlink (fileName.c_str()); ofstream newFile (fileName.c_str()); - //create new file +//create new file int fd = open(fileName.c_str(), O_RDWR | O_CREAT, 0664); copy(lines.begin(), lines.end(), ostream_iterator(newFile, "\n")); @@ -7773,7 +7776,7 @@ void Oam::actionMysqlCalpont(MYSQLCALPONT_ACTION action) else return; - // check if mysql-Columnstore is installed + // check if mysql-Columnstore is installed string mysqlscript = InstallDir + "/mysql/mysql-Columnstore"; if (access(mysqlscript.c_str(), X_OK) != 0) @@ -10377,146 +10380,161 @@ void Oam::sendStatusUpdate(ByteStream obs, ByteStream::byte returnRequestType) * ****************************************************************************/ - void Oam::amazonDetach(dbrootList dbrootConfigList) +void Oam::amazonDetach(dbrootList dbrootConfigList) +{ + //if amazon cloud with external volumes, do the detach/attach moves + string cloud; + string DBRootStorageType; + + try { - //if amazon cloud with external volumes, do the detach/attach moves - string cloud; - string DBRootStorageType; - try { - getSystemConfig("Cloud", cloud); - getSystemConfig("DBRootStorageType", DBRootStorageType); - } - catch(...) {} + getSystemConfig("Cloud", cloud); + getSystemConfig("DBRootStorageType", DBRootStorageType); + } + catch (...) {} - if ( (cloud == "amazon-ec2" || cloud == "amazon-vpc") && - DBRootStorageType == "external" ) - { - writeLog("amazonDetach function started ", LOG_TYPE_DEBUG ); - - dbrootList::iterator pt3 = dbrootConfigList.begin(); - for( ; pt3 != dbrootConfigList.end() ; pt3++) - { - string dbrootid = *pt3; - string volumeNameID = "PMVolumeName" + dbrootid; - string volumeName = oam::UnassignedName; - string deviceNameID = "PMVolumeDeviceName" + dbrootid; - string deviceName = oam::UnassignedName; - try { - getSystemConfig( volumeNameID, volumeName); - getSystemConfig( deviceNameID, deviceName); - } - catch(...) - {} - - if ( volumeName == oam::UnassignedName || deviceName == oam::UnassignedName ) - { - cout << " ERROR: amazonDetach, invalid configure " + volumeName + ":" + deviceName << endl; - writeLog("ERROR: amazonDetach, invalid configure " + volumeName + ":" + deviceName, LOG_TYPE_ERROR ); - exceptionControl("amazonDetach", API_INVALID_PARAMETER); - } - - //send msg to to-pm to umount volume - int returnStatus = sendMsgToProcMgr(UNMOUNT, dbrootid, FORCEFUL, ACK_YES); - if (returnStatus != API_SUCCESS) { - writeLog("ERROR: amazonDetach, umount failed on " + dbrootid, LOG_TYPE_ERROR ); - } - - if (!detachEC2Volume(volumeName)) { - cout << " ERROR: amazonDetach, detachEC2Volume failed on " + volumeName << endl; - writeLog("ERROR: amazonDetach, detachEC2Volume failed on " + volumeName , LOG_TYPE_ERROR ); - exceptionControl("amazonDetach", API_FAILURE); - } - - writeLog("amazonDetach, detachEC2Volume passed on " + volumeName , LOG_TYPE_DEBUG ); - } - } - } - - /*************************************************************************** - * - * Function: amazonAttach - * - * Purpose: Amazon EC2 volume Attach needed - * - ****************************************************************************/ - - void Oam::amazonAttach(std::string toPM, dbrootList dbrootConfigList) + if ( (cloud == "amazon-ec2" || cloud == "amazon-vpc") && + DBRootStorageType == "external" ) { - //if amazon cloud with external volumes, do the detach/attach moves - string cloud; - string DBRootStorageType; - try { - getSystemConfig("Cloud", cloud); - getSystemConfig("DBRootStorageType", DBRootStorageType); - } - catch(...) {} + writeLog("amazonDetach function started ", LOG_TYPE_DEBUG ); - if ( (cloud == "amazon-ec2" || cloud == "amazon-vpc") && - DBRootStorageType == "external" ) - { - writeLog("amazonAttach function started ", LOG_TYPE_DEBUG ); + dbrootList::iterator pt3 = dbrootConfigList.begin(); - //get Instance Name for to-pm - string toInstanceName = oam::UnassignedName; - try - { - ModuleConfig moduleconfig; - getSystemConfig(toPM, moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - toInstanceName = (*pt1).HostName; - } - catch(...) - {} + for ( ; pt3 != dbrootConfigList.end() ; pt3++) + { + string dbrootid = *pt3; + string volumeNameID = "PMVolumeName" + dbrootid; + string volumeName = oam::UnassignedName; + string deviceNameID = "PMVolumeDeviceName" + dbrootid; + string deviceName = oam::UnassignedName; - if ( toInstanceName == oam::UnassignedName || toInstanceName.empty() ) - { - cout << " ERROR: amazonAttach, invalid Instance Name for " << toPM << endl; - writeLog("ERROR: amazonAttach, invalid Instance Name " + toPM, LOG_TYPE_ERROR ); - exceptionControl("amazonAttach", API_INVALID_PARAMETER); - } + try + { + getSystemConfig( volumeNameID, volumeName); + getSystemConfig( deviceNameID, deviceName); + } + catch (...) + {} - dbrootList::iterator pt3 = dbrootConfigList.begin(); - for( ; pt3 != dbrootConfigList.end() ; pt3++) - { - string dbrootid = *pt3; - string volumeNameID = "PMVolumeName" + dbrootid; - string volumeName = oam::UnassignedName; - string deviceNameID = "PMVolumeDeviceName" + dbrootid; - string deviceName = oam::UnassignedName; - try { - getSystemConfig( volumeNameID, volumeName); - getSystemConfig( deviceNameID, deviceName); - } - catch(...) - {} + if ( volumeName == oam::UnassignedName || deviceName == oam::UnassignedName ) + { + cout << " ERROR: amazonDetach, invalid configure " + volumeName + ":" + deviceName << endl; + writeLog("ERROR: amazonDetach, invalid configure " + volumeName + ":" + deviceName, LOG_TYPE_ERROR ); + exceptionControl("amazonDetach", API_INVALID_PARAMETER); + } - if ( volumeName == oam::UnassignedName || deviceName == oam::UnassignedName ) - { - cout << " ERROR: amazonAttach, invalid configure " + volumeName + ":" + deviceName << endl; - writeLog("ERROR: amazonAttach, invalid configure " + volumeName + ":" + deviceName, LOG_TYPE_ERROR ); - exceptionControl("amazonAttach", API_INVALID_PARAMETER); - } + //send msg to to-pm to umount volume + int returnStatus = sendMsgToProcMgr(UNMOUNT, dbrootid, FORCEFUL, ACK_YES); - if (!attachEC2Volume(volumeName, deviceName, toInstanceName)) { - cout << " ERROR: amazonAttach, attachEC2Volume failed on " + volumeName + ":" + deviceName + ":" + toInstanceName << endl; - writeLog("ERROR: amazonAttach, attachEC2Volume failed on " + volumeName + ":" + deviceName + ":" + toInstanceName, LOG_TYPE_ERROR ); - exceptionControl("amazonAttach", API_FAILURE); - } + if (returnStatus != API_SUCCESS) + { + writeLog("ERROR: amazonDetach, umount failed on " + dbrootid, LOG_TYPE_ERROR ); + } - writeLog("amazonAttach, attachEC2Volume passed on " + volumeName + ":" + toPM, LOG_TYPE_DEBUG ); - } - } - } + if (!detachEC2Volume(volumeName)) + { + cout << " ERROR: amazonDetach, detachEC2Volume failed on " + volumeName << endl; + writeLog("ERROR: amazonDetach, detachEC2Volume failed on " + volumeName, LOG_TYPE_ERROR ); + exceptionControl("amazonDetach", API_FAILURE); + } + writeLog("amazonDetach, detachEC2Volume passed on " + volumeName, LOG_TYPE_DEBUG ); + } + } +} - /*************************************************************************** - * - * Function: amazonReattach +/*************************************************************************** * - * Purpose: Amazon EC2 volume reattach needed + * Function: amazonAttach + * + * Purpose: Amazon EC2 volume Attach needed * ****************************************************************************/ +void Oam::amazonAttach(std::string toPM, dbrootList dbrootConfigList) +{ + //if amazon cloud with external volumes, do the detach/attach moves + string cloud; + string DBRootStorageType; + + try + { + getSystemConfig("Cloud", cloud); + getSystemConfig("DBRootStorageType", DBRootStorageType); + } + catch (...) {} + + if ( (cloud == "amazon-ec2" || cloud == "amazon-vpc") && + DBRootStorageType == "external" ) + { + writeLog("amazonAttach function started ", LOG_TYPE_DEBUG ); + + //get Instance Name for to-pm + string toInstanceName = oam::UnassignedName; + + try + { + ModuleConfig moduleconfig; + getSystemConfig(toPM, moduleconfig); + HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); + toInstanceName = (*pt1).HostName; + } + catch (...) + {} + + if ( toInstanceName == oam::UnassignedName || toInstanceName.empty() ) + { + cout << " ERROR: amazonAttach, invalid Instance Name for " << toPM << endl; + writeLog("ERROR: amazonAttach, invalid Instance Name " + toPM, LOG_TYPE_ERROR ); + exceptionControl("amazonAttach", API_INVALID_PARAMETER); + } + + dbrootList::iterator pt3 = dbrootConfigList.begin(); + + for ( ; pt3 != dbrootConfigList.end() ; pt3++) + { + string dbrootid = *pt3; + string volumeNameID = "PMVolumeName" + dbrootid; + string volumeName = oam::UnassignedName; + string deviceNameID = "PMVolumeDeviceName" + dbrootid; + string deviceName = oam::UnassignedName; + + try + { + getSystemConfig( volumeNameID, volumeName); + getSystemConfig( deviceNameID, deviceName); + } + catch (...) + {} + + if ( volumeName == oam::UnassignedName || deviceName == oam::UnassignedName ) + { + cout << " ERROR: amazonAttach, invalid configure " + volumeName + ":" + deviceName << endl; + writeLog("ERROR: amazonAttach, invalid configure " + volumeName + ":" + deviceName, LOG_TYPE_ERROR ); + exceptionControl("amazonAttach", API_INVALID_PARAMETER); + } + + if (!attachEC2Volume(volumeName, deviceName, toInstanceName)) + { + cout << " ERROR: amazonAttach, attachEC2Volume failed on " + volumeName + ":" + deviceName + ":" + toInstanceName << endl; + writeLog("ERROR: amazonAttach, attachEC2Volume failed on " + volumeName + ":" + deviceName + ":" + toInstanceName, LOG_TYPE_ERROR ); + exceptionControl("amazonAttach", API_FAILURE); + } + + writeLog("amazonAttach, attachEC2Volume passed on " + volumeName + ":" + toPM, LOG_TYPE_DEBUG ); + } + } +} + + +/*************************************************************************** +* +* Function: amazonReattach +* +* Purpose: Amazon EC2 volume reattach needed +* +****************************************************************************/ + void Oam::amazonReattach(std::string toPM, dbrootList dbrootConfigList, bool attach) { //if amazon cloud with external volumes, do the detach/attach moves diff --git a/oam/oamcpp/liboamcpp.h b/oam/oamcpp/liboamcpp.h index 3b18ac490..8575cd4d6 100644 --- a/oam/oamcpp/liboamcpp.h +++ b/oam/oamcpp/liboamcpp.h @@ -229,7 +229,7 @@ enum API_STATUS API_CONN_REFUSED, API_CANCELLED, API_STILL_WORKING, - API_DETACH_FAILURE, + API_DETACH_FAILURE, API_MAX }; @@ -2433,8 +2433,8 @@ public: void amazonReattach(std::string toPM, dbrootList dbrootConfigList, bool attach = false); void mountDBRoot(dbrootList dbrootConfigList, bool mount = true); - void amazonDetach(dbrootList dbrootConfigList); - void amazonAttach(std::string toPM, dbrootList dbrootConfigList); + void amazonDetach(dbrootList dbrootConfigList); + void amazonAttach(std::string toPM, dbrootList dbrootConfigList); /** *@brief gluster control diff --git a/primitives/primproc/primitiveserver.cpp b/primitives/primproc/primitiveserver.cpp index f977149c1..227b494de 100644 --- a/primitives/primproc/primitiveserver.cpp +++ b/primitives/primproc/primitiveserver.cpp @@ -2067,11 +2067,11 @@ struct ReadThread case DICT_CREATE_EQUALITY_FILTER: { PriorityThreadPool::Job job; - const uint8_t *buf = bs->buf(); - uint32_t pos = sizeof(ISMPacketHeader) - 2; - job.stepID = *((uint32_t *) &buf[pos+6]); - job.uniqueID = *((uint32_t *) &buf[pos+10]); - job.sock = outIos; + const uint8_t* buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t*) &buf[pos + 6]); + job.uniqueID = *((uint32_t*) &buf[pos + 10]); + job.sock = outIos; job.functor = boost::shared_ptr(new CreateEqualityFilter(bs)); OOBPool->addJob(job); break; @@ -2080,11 +2080,11 @@ struct ReadThread case DICT_DESTROY_EQUALITY_FILTER: { PriorityThreadPool::Job job; - const uint8_t *buf = bs->buf(); - uint32_t pos = sizeof(ISMPacketHeader) - 2; - job.stepID = *((uint32_t *) &buf[pos+6]); - job.uniqueID = *((uint32_t *) &buf[pos+10]); - job.sock = outIos; + const uint8_t* buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t*) &buf[pos + 6]); + job.uniqueID = *((uint32_t*) &buf[pos + 10]); + job.sock = outIos; job.functor = boost::shared_ptr(new DestroyEqualityFilter(bs)); OOBPool->addJob(job); break; @@ -2118,11 +2118,14 @@ struct ReadThread job.id = hdr->Hdr.UniqueID; job.weight = LOGICAL_BLOCK_RIDS; job.priority = hdr->Hdr.Priority; - const uint8_t *buf = bs->buf(); - uint32_t pos = sizeof(ISMPacketHeader) - 2; - job.stepID = *((uint32_t *) &buf[pos+6]); - job.uniqueID = *((uint32_t *) &buf[pos+10]); - job.sock = outIos; + const uint8_t* buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t*) &buf[pos + 6]); + job.uniqueID = *((uint32_t*) &buf[pos + 10]); + job.sock = outIos; + + if (hdr->flags & IS_SYSCAT) + { //boost::thread t(DictScanJob(outIos, bs, writeLock)); // using already-existing threads may cut latency // if it's changed back to running in an independent thread @@ -2167,11 +2170,14 @@ struct ReadThread job.id = bpps->getID(); job.weight = ismHdr->Size; job.priority = bpps->priority(); - const uint8_t *buf = bs->buf(); - uint32_t pos = sizeof(ISMPacketHeader) - 2; - job.stepID = *((uint32_t *) &buf[pos+6]); - job.uniqueID = *((uint32_t *) &buf[pos+10]); - job.sock = outIos; + const uint8_t* buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t*) &buf[pos + 6]); + job.uniqueID = *((uint32_t*) &buf[pos + 10]); + job.sock = outIos; + + if (bpps->isSysCat()) + { //boost::thread t(*bpps); // using already-existing threads may cut latency @@ -2191,11 +2197,11 @@ struct ReadThread { PriorityThreadPool::Job job; job.functor = boost::shared_ptr(new BPPHandler::Create(fBPPHandler, bs)); - const uint8_t *buf = bs->buf(); - uint32_t pos = sizeof(ISMPacketHeader) - 2; - job.stepID = *((uint32_t *) &buf[pos+6]); - job.uniqueID = *((uint32_t *) &buf[pos+10]); - job.sock = outIos; + const uint8_t* buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t*) &buf[pos + 6]); + job.uniqueID = *((uint32_t*) &buf[pos + 10]); + job.sock = outIos; OOBPool->addJob(job); //fBPPHandler->createBPP(*bs); break; @@ -2206,11 +2212,11 @@ struct ReadThread PriorityThreadPool::Job job; job.functor = boost::shared_ptr(new BPPHandler::AddJoiner(fBPPHandler, bs)); job.id = fBPPHandler->getUniqueID(bs, ismHdr->Command); - const uint8_t *buf = bs->buf(); - uint32_t pos = sizeof(ISMPacketHeader) - 2; - job.stepID = *((uint32_t *) &buf[pos+6]); - job.uniqueID = *((uint32_t *) &buf[pos+10]); - job.sock = outIos; + const uint8_t* buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t*) &buf[pos + 6]); + job.uniqueID = *((uint32_t*) &buf[pos + 10]); + job.sock = outIos; OOBPool->addJob(job); //fBPPHandler->addJoinerToBPP(*bs); break; @@ -2224,11 +2230,11 @@ struct ReadThread PriorityThreadPool::Job job; job.functor = boost::shared_ptr(new BPPHandler::LastJoiner(fBPPHandler, bs)); job.id = fBPPHandler->getUniqueID(bs, ismHdr->Command); - const uint8_t *buf = bs->buf(); - uint32_t pos = sizeof(ISMPacketHeader) - 2; - job.stepID = *((uint32_t *) &buf[pos+6]); - job.uniqueID = *((uint32_t *) &buf[pos+10]); - job.sock = outIos; + const uint8_t* buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t*) &buf[pos + 6]); + job.uniqueID = *((uint32_t*) &buf[pos + 10]); + job.sock = outIos; OOBPool->addJob(job); break; } @@ -2240,11 +2246,11 @@ struct ReadThread PriorityThreadPool::Job job; job.functor = boost::shared_ptr(new BPPHandler::Destroy(fBPPHandler, bs)); job.id = fBPPHandler->getUniqueID(bs, ismHdr->Command); - const uint8_t *buf = bs->buf(); - uint32_t pos = sizeof(ISMPacketHeader) - 2; - job.stepID = *((uint32_t *) &buf[pos+6]); - job.uniqueID = *((uint32_t *) &buf[pos+10]); - job.sock = outIos; + const uint8_t* buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t*) &buf[pos + 6]); + job.uniqueID = *((uint32_t*) &buf[pos + 10]); + job.sock = outIos; OOBPool->addJob(job); //fBPPHandler->destroyBPP(*bs); break; @@ -2263,11 +2269,11 @@ struct ReadThread PriorityThreadPool::Job job; job.functor = boost::shared_ptr(new BPPHandler::Abort(fBPPHandler, bs)); job.id = fBPPHandler->getUniqueID(bs, ismHdr->Command); - const uint8_t *buf = bs->buf(); - uint32_t pos = sizeof(ISMPacketHeader) - 2; - job.stepID = *((uint32_t *) &buf[pos+6]); - job.uniqueID = *((uint32_t *) &buf[pos+10]); - job.sock = outIos; + const uint8_t* buf = bs->buf(); + uint32_t pos = sizeof(ISMPacketHeader) - 2; + job.stepID = *((uint32_t*) &buf[pos + 6]); + job.uniqueID = *((uint32_t*) &buf[pos + 10]); + job.sock = outIos; OOBPool->addJob(job); break; } @@ -2299,12 +2305,12 @@ struct ReadThread } } - // If this function is called, we have a "bug" of some sort. We added - // the "fIos" connection to UmSocketSelector earlier, so at the very - // least, UmSocketSelector should have been able to return that con- - // nection/port. We will try to recover by using the original fIos to - // send the response msg; but as stated, if this ever happens we have - // a bug we need to resolve. +// If this function is called, we have a "bug" of some sort. We added +// the "fIos" connection to UmSocketSelector earlier, so at the very +// least, UmSocketSelector should have been able to return that con- +// nection/port. We will try to recover by using the original fIos to +// send the response msg; but as stated, if this ever happens we have +// a bug we need to resolve. void handleUmSockSelErr(const string& cmd) { ostringstream oss; diff --git a/procmgr/main.cpp b/procmgr/main.cpp index ae662adcb..05160da1e 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -1688,7 +1688,7 @@ void pingDeviceThread() processManager.restartProcessType("WriteEngineServer", moduleName); //set module to enable state - processManager.enableModule(moduleName, oam::AUTO_OFFLINE, true); + processManager.enableModule(moduleName, oam::AUTO_OFFLINE, true); downActiveOAMModule = false; int retry; @@ -1784,7 +1784,7 @@ void pingDeviceThread() } else //set module to enable state - processManager.enableModule(moduleName, oam::AUTO_OFFLINE, true); + processManager.enableModule(moduleName, oam::AUTO_OFFLINE, true); //restart module processes int retry = 0; @@ -2094,7 +2094,7 @@ void pingDeviceThread() if ( PrimaryUMModuleName == moduleName ) downPrimaryUM = true; - // if disabled, skip + // if disabled, skip if (opState != oam::AUTO_DISABLED ) { //Log failure, issue alarm, set moduleOpState @@ -2140,7 +2140,9 @@ void pingDeviceThread() if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) || ( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) || ( moduleName.find("pm") == 0 && amazon && AmazonPMFailover == "y") ) - string error; + { + string error; + try { log.writeLog(__LINE__, "Call autoMovePmDbroot", LOG_TYPE_DEBUG); @@ -2157,23 +2159,23 @@ void pingDeviceThread() catch (...) { log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: Caught unknown exception!", LOG_TYPE_ERROR); - } - - if ( error == oam.itoa(oam::API_DETACH_FAILURE) ) - { - processManager.setModuleState(moduleName, oam::AUTO_DISABLED); + } - // resume the dbrm - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); + if ( error == oam.itoa(oam::API_DETACH_FAILURE) ) + { + processManager.setModuleState(moduleName, oam::AUTO_DISABLED); - //enable query stats - dbrm.setSystemQueryReady(true); + // resume the dbrm + oam.dbrmctl("resume"); + log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - //set query system state ready - processManager.setQuerySystemState(true); - - break; + //enable query stats + dbrm.setSystemQueryReady(true); + + //set query system state ready + processManager.setQuerySystemState(true); + + break; } } } diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 7646a0c3d..f9f5a8d47 100644 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -3780,7 +3780,7 @@ void ProcessManager::recycleProcess(string module, bool enableModule) restartProcessType("ExeMgr"); sleep(1); - restartProcessType("mysqld"); + restartProcessType("mysqld"); restartProcessType("WriteEngineServer"); sleep(1); @@ -3839,8 +3839,8 @@ int ProcessManager::enableModule(string target, int state, bool failover) setStandbyModule(newStandbyModule); //set recycle process - if (!failover) - recycleProcess(target); + if (!failover) + recycleProcess(target); log.writeLog(__LINE__, "enableModule request for " + target + " completed", LOG_TYPE_DEBUG); @@ -4648,7 +4648,7 @@ int ProcessManager::restartProcessType( std::string processName, std::string ski PMwithUM = "n"; } - // If mysqld is the processName, then send to modules were ExeMgr is running + // If mysqld is the processName, then send to modules were ExeMgr is running try { oam.getProcessStatus(systemprocessstatus); @@ -4659,7 +4659,7 @@ int ProcessManager::restartProcessType( std::string processName, std::string ski if ( systemprocessstatus.processstatus[i].Module == skipModule ) continue; - if ( processName == "mysqld" ) { + if ( processName == "mysqld" ) { if ( systemprocessstatus.processstatus[i].ProcessName == "ExeMgr") { @@ -9814,7 +9814,7 @@ int ProcessManager::OAMParentModuleChange() { log.writeLog(__LINE__, "System Active, restart needed processes", LOG_TYPE_DEBUG); - processManager.restartProcessType("mysqld"); + processManager.restartProcessType("mysqld"); processManager.restartProcessType("ExeMgr"); processManager.restartProcessType("WriteEngineServer"); processManager.reinitProcessType("DBRMWorkerNode"); @@ -11014,7 +11014,7 @@ void ProcessManager::stopProcessTypes(bool manualFlag) log.writeLog(__LINE__, "stopProcessTypes Called"); //front-end first - processManager.stopProcessType("mysqld", manualFlag); + processManager.stopProcessType("mysqld", manualFlag); processManager.stopProcessType("DMLProc", manualFlag); processManager.stopProcessType("DDLProc", manualFlag); processManager.stopProcessType("ExeMgr", manualFlag); diff --git a/procmon/main.cpp b/procmon/main.cpp index 41977139c..35a2ebb59 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -781,10 +781,10 @@ int main(int argc, char** argv) if ( ret != 0 ) log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR); - //mysqld status monitor thread - if ( config.moduleType() == "um" || - ( config.moduleType() == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) || - ( config.moduleType() == "pm" && PMwithUM == "y") ) + //mysqld status monitor thread + if ( config.moduleType() == "um" || + ( config.moduleType() == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) || + ( config.moduleType() == "pm" && PMwithUM == "y") ) { pthread_t mysqlThread; ret = pthread_create (&mysqlThread, NULL, (void* (*)(void*)) &mysqlMonitorThread, NULL); @@ -1233,7 +1233,7 @@ static void mysqlMonitorThread(MonitorConfig config) catch (...) {} - sleep(5); + sleep(5); } } diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index 3da88c2df..5ff74b034 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -484,7 +484,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO log.writeLog(__LINE__, "MSG RECEIVED: Stop process request on " + processName); int requestStatus = API_SUCCESS; - // check for mysqld + // check for mysqld if ( processName == "mysqld" ) { try @@ -553,7 +553,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO msg >> manualFlag; log.writeLog(__LINE__, "MSG RECEIVED: Start process request on: " + processName); - // check for mysqld + // check for mysqld if ( processName == "mysqld" ) { try @@ -684,7 +684,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO log.writeLog(__LINE__, "MSG RECEIVED: Restart process request on " + processName); int requestStatus = API_SUCCESS; - // check for mysqld restart + // check for mysqld restart if ( processName == "mysqld" ) { try @@ -933,7 +933,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO log.writeLog(__LINE__, "Error running DBRM clearShm", LOG_TYPE_ERROR); } - //stop the mysqld daemon + //stop the mysqld daemon try { oam.actionMysqlCalpont(MYSQL_STOP); @@ -1071,13 +1071,14 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO system(cmd.c_str()); - //start the mysqld daemon + //start the mysqld daemon try { oam.actionMysqlCalpont(MYSQL_START); } catch (...) - { // mysqld didn't start, return with error + { + // mysqld didn't start, return with error // mysql didn't start, return with error log.writeLog(__LINE__, "STARTALL: MySQL failed to start, start-module failure", LOG_TYPE_CRITICAL); @@ -1366,7 +1367,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO //send down notification oam.sendDeviceNotification(config.moduleName(), MODULE_DOWN); - //stop the mysqld daemon and then columnstore + //stop the mysqld daemon and then columnstore try { oam.actionMysqlCalpont(MYSQL_STOP); } @@ -1548,7 +1549,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO } } - // install mysqld rpms if being reconfigured as a um + // install mysqld rpms if being reconfigured as a um if ( reconfigureModuleName.find("um") != string::npos ) { string cmd = startup::StartUp::installDir() + "/bin/post-mysqld-install >> /tmp/rpminstall"; diff --git a/utils/dataconvert/dataconvert.cpp b/utils/dataconvert/dataconvert.cpp index 1a2e90a88..c4dbbc728 100644 --- a/utils/dataconvert/dataconvert.cpp +++ b/utils/dataconvert/dataconvert.cpp @@ -1420,6 +1420,7 @@ DataConvert::convertColumnData(const CalpontSystemCatalog::ColType& colType, { pushWarning = true; } + value = (int64_t) * (reinterpret_cast(&aTime)); } break; @@ -1928,6 +1929,7 @@ int64_t DataConvert::convertColumnTime( { return value; } + if (dataOrgLen < 3) { // Not enough chars to be a time diff --git a/utils/funcexp/func_date.cpp b/utils/funcexp/func_date.cpp index 9f875b374..d0bc30942 100644 --- a/utils/funcexp/func_date.cpp +++ b/utils/funcexp/func_date.cpp @@ -75,7 +75,7 @@ int64_t Func_date::getIntVal(rowgroup::Row& row, break; } - // Time adds to now() and then gets value + // Time adds to now() and then gets value case CalpontSystemCatalog::TIME: { int64_t val; diff --git a/utils/funcexp/func_day.cpp b/utils/funcexp/func_day.cpp index a25a41bcf..7ff2bab9a 100644 --- a/utils/funcexp/func_day.cpp +++ b/utils/funcexp/func_day.cpp @@ -62,7 +62,7 @@ int64_t Func_day::getIntVal(rowgroup::Row& row, val = parm[0]->data()->getIntVal(row, isNull); return (uint32_t)((val >> 38) & 0x3f); - // Time adds to now() and then gets value + // Time adds to now() and then gets value case CalpontSystemCatalog::TIME: aDateTime = static_cast(nowDatetime()); aTime = parm[0]->data()->getTimeIntVal(row, isNull); diff --git a/utils/funcexp/func_dayname.cpp b/utils/funcexp/func_dayname.cpp index abb2d47dc..5da6d8943 100644 --- a/utils/funcexp/func_dayname.cpp +++ b/utils/funcexp/func_dayname.cpp @@ -73,7 +73,7 @@ int64_t Func_dayname::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; - // Time adds to now() and then gets value + // Time adds to now() and then gets value case CalpontSystemCatalog::TIME: aDateTime = static_cast(nowDatetime()); aTime = parm[0]->data()->getTimeIntVal(row, isNull); @@ -160,8 +160,10 @@ string Func_dayname::getStrVal(rowgroup::Row& row, CalpontSystemCatalog::ColType& op_ct) { int32_t weekday = getIntVal(row, parm, isNull, op_ct); + if (weekday == -1) return ""; + return helpers::weekdayFullNames[weekday]; } diff --git a/utils/funcexp/func_dayofweek.cpp b/utils/funcexp/func_dayofweek.cpp index 1ac549060..ec84f5738 100644 --- a/utils/funcexp/func_dayofweek.cpp +++ b/utils/funcexp/func_dayofweek.cpp @@ -71,7 +71,7 @@ int64_t Func_dayofweek::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; - // Time adds to now() and then gets value + // Time adds to now() and then gets value case CalpontSystemCatalog::TIME: aDateTime = static_cast(nowDatetime()); aTime = parm[0]->data()->getTimeIntVal(row, isNull); diff --git a/utils/funcexp/func_dayofyear.cpp b/utils/funcexp/func_dayofyear.cpp index 0ec48f22f..ee3b9cf30 100644 --- a/utils/funcexp/func_dayofyear.cpp +++ b/utils/funcexp/func_dayofyear.cpp @@ -71,7 +71,7 @@ int64_t Func_dayofyear::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; - // Time adds to now() and then gets value + // Time adds to now() and then gets value case CalpontSystemCatalog::TIME: aDateTime = static_cast(nowDatetime()); aTime = parm[0]->data()->getTimeIntVal(row, isNull); diff --git a/utils/funcexp/func_last_day.cpp b/utils/funcexp/func_last_day.cpp index 757e19d77..28b4c01e2 100644 --- a/utils/funcexp/func_last_day.cpp +++ b/utils/funcexp/func_last_day.cpp @@ -72,7 +72,7 @@ int64_t Func_last_day::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; - // Time adds to now() and then gets value + // Time adds to now() and then gets value case CalpontSystemCatalog::TIME: aDateTime = static_cast(nowDatetime()); aTime = parm[0]->data()->getTimeIntVal(row, isNull); diff --git a/utils/funcexp/func_month.cpp b/utils/funcexp/func_month.cpp index 4269ae22f..5479270d0 100644 --- a/utils/funcexp/func_month.cpp +++ b/utils/funcexp/func_month.cpp @@ -61,7 +61,7 @@ int64_t Func_month::getIntVal(rowgroup::Row& row, val = parm[0]->data()->getIntVal(row, isNull); return (unsigned)((val >> 44) & 0xf); - // Time adds to now() and then gets value + // Time adds to now() and then gets value case CalpontSystemCatalog::TIME: aDateTime = static_cast(nowDatetime()); aTime = parm[0]->data()->getTimeIntVal(row, isNull); diff --git a/utils/funcexp/func_monthname.cpp b/utils/funcexp/func_monthname.cpp index dfa530cdd..9657b1ea2 100644 --- a/utils/funcexp/func_monthname.cpp +++ b/utils/funcexp/func_monthname.cpp @@ -48,8 +48,10 @@ string Func_monthname::getStrVal(rowgroup::Row& row, CalpontSystemCatalog::ColType& op_ct) { int32_t month = getIntVal(row, parm, isNull, op_ct); + if (month == -1) return ""; + return helpers::monthFullNames[month]; } @@ -90,7 +92,7 @@ int64_t Func_monthname::getIntVal(rowgroup::Row& row, val = parm[0]->data()->getIntVal(row, isNull); return (unsigned)((val >> 44) & 0xf); - // Time adds to now() and then gets value + // Time adds to now() and then gets value case CalpontSystemCatalog::TIME: aDateTime = static_cast(nowDatetime()); aTime = parm[0]->data()->getTimeIntVal(row, isNull); diff --git a/utils/funcexp/func_quarter.cpp b/utils/funcexp/func_quarter.cpp index c819cbdbe..78559d68d 100644 --- a/utils/funcexp/func_quarter.cpp +++ b/utils/funcexp/func_quarter.cpp @@ -65,7 +65,7 @@ int64_t Func_quarter::getIntVal(rowgroup::Row& row, month = (val >> 44) & 0xf; break; - // Time adds to now() and then gets value + // Time adds to now() and then gets value case CalpontSystemCatalog::TIME: aDateTime = static_cast(nowDatetime()); aTime = parm[0]->data()->getTimeIntVal(row, isNull); diff --git a/utils/funcexp/func_to_days.cpp b/utils/funcexp/func_to_days.cpp index 5c58b1b11..cc2e3afa2 100644 --- a/utils/funcexp/func_to_days.cpp +++ b/utils/funcexp/func_to_days.cpp @@ -85,7 +85,7 @@ int64_t Func_to_days::getIntVal(rowgroup::Row& row, break; } - // Time adds to now() and then gets value + // Time adds to now() and then gets value case CalpontSystemCatalog::TIME: { int64_t val; diff --git a/utils/funcexp/func_week.cpp b/utils/funcexp/func_week.cpp index 9cb869c1e..a9e47bd4b 100644 --- a/utils/funcexp/func_week.cpp +++ b/utils/funcexp/func_week.cpp @@ -75,7 +75,7 @@ int64_t Func_week::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; - // Time adds to now() and then gets value + // Time adds to now() and then gets value case CalpontSystemCatalog::TIME: aDateTime = static_cast(nowDatetime()); aTime = parm[0]->data()->getTimeIntVal(row, isNull); diff --git a/utils/funcexp/func_weekday.cpp b/utils/funcexp/func_weekday.cpp index 67f535f1f..9666710f5 100644 --- a/utils/funcexp/func_weekday.cpp +++ b/utils/funcexp/func_weekday.cpp @@ -71,7 +71,7 @@ int64_t Func_weekday::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; - // Time adds to now() and then gets value + // Time adds to now() and then gets value case CalpontSystemCatalog::TIME: aDateTime = static_cast(nowDatetime()); aTime = parm[0]->data()->getTimeIntVal(row, isNull); diff --git a/utils/funcexp/func_year.cpp b/utils/funcexp/func_year.cpp index 8b3f79fa0..17ff4f2d0 100644 --- a/utils/funcexp/func_year.cpp +++ b/utils/funcexp/func_year.cpp @@ -61,7 +61,7 @@ int64_t Func_year::getIntVal(rowgroup::Row& row, val = parm[0]->data()->getIntVal(row, isNull); return (unsigned)((val >> 48) & 0xffff); - // Time adds to now() and then gets value + // Time adds to now() and then gets value case CalpontSystemCatalog::TIME: aDateTime = static_cast(nowDatetime()); aTime = parm[0]->data()->getTimeIntVal(row, isNull); diff --git a/utils/funcexp/func_yearweek.cpp b/utils/funcexp/func_yearweek.cpp index 749491e11..e567440b4 100644 --- a/utils/funcexp/func_yearweek.cpp +++ b/utils/funcexp/func_yearweek.cpp @@ -78,7 +78,7 @@ int64_t Func_yearweek::getIntVal(rowgroup::Row& row, day = (uint32_t)((val >> 38) & 0x3f); break; - // Time adds to now() and then gets value + // Time adds to now() and then gets value case CalpontSystemCatalog::TIME: aDateTime = static_cast(nowDatetime()); aTime = parm[0]->data()->getTimeIntVal(row, isNull); diff --git a/utils/libmysql_client/libmysql_client.cpp b/utils/libmysql_client/libmysql_client.cpp index 9c67d3fa8..300df8a75 100644 --- a/utils/libmysql_client/libmysql_client.cpp +++ b/utils/libmysql_client/libmysql_client.cpp @@ -119,16 +119,17 @@ int LibMySQL::run(const char* query) void LibMySQL::handleMySqlError(const char* errStr, unsigned int errCode) { - ostringstream oss; - if (mysql->getErrno()) + ostringstream oss; + + if (getErrno()) { - oss << errStr << " (" << mysql->getErrno() << ")"; - oss << " (" << mysql->getErrorMsg() << ")"; + oss << errStr << " (" << getErrno() << ")"; + oss << " (" << getErrorMsg() << ")"; } else { - oss << errStr << " (" << errCode << ")"; - oss << " (unknown)"; + oss << errStr << " (" << errCode << ")"; + oss << " (unknown)"; } throw logging::IDBExcept(oss.str(), logging::ERR_CROSS_ENGINE_CONNECT); diff --git a/utils/libmysql_client/libmysql_client.h b/utils/libmysql_client/libmysql_client.h index 7d3c258e7..5720ffd73 100644 --- a/utils/libmysql_client/libmysql_client.h +++ b/utils/libmysql_client/libmysql_client.h @@ -71,8 +71,14 @@ public: { return fErrStr; } - unsigned int getErrno() { return mysql_errno(fCon); } - const char* getErrorMsg() { return mysql_error(fCon); } + unsigned int getErrno() + { + return mysql_errno(fCon); + } + const char* getErrorMsg() + { + return mysql_error(fCon); + } private: MYSQL* fCon; diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index bead74aff..1a28de089 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -2002,7 +2002,7 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu fRow.setLongDoubleField(fRow.getLongDoubleField(colAux + 1) + valIn * valIn, colAux + 1); } -void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, +void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, uint64_t& funcColsIdx) { uint32_t paramCount = fRGContext.getParameterCount(); @@ -2012,6 +2012,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, ConstantColumn* cc; bool bIsNull = false; execplan::CalpontSystemCatalog::ColDataType colDataType; + for (uint32_t i = 0; i < paramCount; ++i) { // If UDAF_IGNORE_NULLS is on, bIsNull gets set the first time @@ -2022,6 +2023,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, ++funcColsIdx; continue; } + SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; mcsv1sdk::ColumnDatum& datum = valsIn[i]; // Turn on NULL flags @@ -2030,13 +2032,14 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, // If this particular parameter is a constant, then we need // to acces the constant value rather than a row value. cc = NULL; + if (pFunctionCol->fpConstCol) { cc = dynamic_cast(pFunctionCol->fpConstCol.get()); } if ((cc && cc->type() == ConstantColumn::NULLDATA) - || (!cc && isNull(&fRowGroupIn, rowIn, colIn) == true)) + || (!cc && isNull(&fRowGroupIn, rowIn, colIn) == true)) { if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { @@ -2044,6 +2047,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, ++funcColsIdx; continue; } + dataFlags[i] |= mcsv1sdk::PARAM_IS_NULL; } @@ -2055,6 +2059,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, { colDataType = fRowGroupIn.getColTypes()[colIn]; } + if (!(dataFlags[i] & mcsv1sdk::PARAM_IS_NULL)) { switch (colDataType) @@ -2066,6 +2071,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, case execplan::CalpontSystemCatalog::BIGINT: { datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + if (cc) { datum.columnData = cc->getIntVal(const_cast(rowIn), bIsNull); @@ -2078,12 +2084,15 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, datum.scale = fRowGroupIn.getScale()[colIn]; datum.precision = fRowGroupIn.getPrecision()[colIn]; } + break; } + case execplan::CalpontSystemCatalog::DECIMAL: case execplan::CalpontSystemCatalog::UDECIMAL: { datum.dataType = colDataType; + if (cc) { datum.columnData = cc->getDecimalVal(const_cast(rowIn), bIsNull).value; @@ -2096,6 +2105,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, datum.scale = fRowGroupIn.getScale()[colIn]; datum.precision = fRowGroupIn.getPrecision()[colIn]; } + break; } @@ -2106,6 +2116,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, case execplan::CalpontSystemCatalog::UBIGINT: { datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + if (cc) { datum.columnData = cc->getUintVal(const_cast(rowIn), bIsNull); @@ -2114,6 +2125,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, { datum.columnData = rowIn.getUintField(colIn); } + break; } @@ -2121,6 +2133,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, case execplan::CalpontSystemCatalog::UDOUBLE: { datum.dataType = execplan::CalpontSystemCatalog::DOUBLE; + if (cc) { datum.columnData = cc->getDoubleVal(const_cast(rowIn), bIsNull); @@ -2129,6 +2142,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, { datum.columnData = rowIn.getDoubleField(colIn); } + break; } @@ -2136,6 +2150,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, case execplan::CalpontSystemCatalog::UFLOAT: { datum.dataType = execplan::CalpontSystemCatalog::FLOAT; + if (cc) { datum.columnData = cc->getFloatVal(const_cast(rowIn), bIsNull); @@ -2144,12 +2159,14 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, { datum.columnData = rowIn.getFloatField(colIn); } + break; } case execplan::CalpontSystemCatalog::DATE: { datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + if (cc) { datum.columnData = cc->getDateIntVal(const_cast(rowIn), bIsNull); @@ -2158,11 +2175,14 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, { datum.columnData = rowIn.getUintField(colIn); } + break; } + case execplan::CalpontSystemCatalog::DATETIME: { datum.dataType = execplan::CalpontSystemCatalog::UBIGINT; + if (cc) { datum.columnData = cc->getDatetimeIntVal(const_cast(rowIn), bIsNull); @@ -2171,12 +2191,14 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, { datum.columnData = rowIn.getUintField(colIn); } + break; } case execplan::CalpontSystemCatalog::TIME: { datum.dataType = execplan::CalpontSystemCatalog::BIGINT; + if (cc) { datum.columnData = cc->getTimeIntVal(const_cast(rowIn), bIsNull); @@ -2185,6 +2207,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, { datum.columnData = rowIn.getIntField(colIn); } + break; } @@ -2196,6 +2219,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, case execplan::CalpontSystemCatalog::BLOB: { datum.dataType = colDataType; + if (cc) { datum.columnData = cc->getStrVal(const_cast(rowIn), bIsNull); @@ -2204,6 +2228,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, { datum.columnData = rowIn.getStringField(colIn); } + break; } @@ -2221,8 +2246,8 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, // MCOL-1201: If there are multiple parameters, the next fFunctionCols // will have the column used. By incrementing the funcColsIdx (passed by // ref, we also increment the caller's index. - if (fFunctionCols.size() > funcColsIdx + 1 - && fFunctionCols[funcColsIdx+1]->fAggFunction == ROWAGG_MULTI_PARM) + if (fFunctionCols.size() > funcColsIdx + 1 + && fFunctionCols[funcColsIdx + 1]->fAggFunction == ROWAGG_MULTI_PARM) { ++funcColsIdx; SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; @@ -2718,6 +2743,7 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) std::string strOut; bool bSetSuccess = false; + switch (colDataType) { case execplan::CalpontSystemCatalog::BIT: @@ -2732,10 +2758,12 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) intOut = valOut.cast(); bSetSuccess = true; } + if (bSetSuccess) { fRow.setIntField<1>(intOut, colOut); } + break; case execplan::CalpontSystemCatalog::SMALLINT: @@ -2746,6 +2774,7 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) fRow.setIntField<2>(intOut, colOut); bSetSuccess = true; } + break; case execplan::CalpontSystemCatalog::INT: @@ -2759,10 +2788,12 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) intOut = valOut.cast(); bSetSuccess = true; } + if (bSetSuccess) { fRow.setIntField<4>(intOut, colOut); } + break; case execplan::CalpontSystemCatalog::BIGINT: @@ -2774,6 +2805,7 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) fRow.setIntField<8>(intOut, colOut); bSetSuccess = true; } + break; case execplan::CalpontSystemCatalog::UTINYINT: @@ -2783,6 +2815,7 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) fRow.setUintField<1>(uintOut, colOut); bSetSuccess = true; } + break; case execplan::CalpontSystemCatalog::USMALLINT: @@ -2793,6 +2826,7 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) fRow.setUintField<2>(uintOut, colOut); bSetSuccess = true; } + break; case execplan::CalpontSystemCatalog::UINT: @@ -2802,6 +2836,7 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) fRow.setUintField<4>(uintOut, colOut); bSetSuccess = true; } + break; case execplan::CalpontSystemCatalog::UBIGINT: @@ -2811,6 +2846,7 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) fRow.setUintField<8>(uintOut, colOut); bSetSuccess = true; } + break; case execplan::CalpontSystemCatalog::DATE: @@ -2821,6 +2857,7 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) fRow.setUintField<8>(uintOut, colOut); bSetSuccess = true; } + break; case execplan::CalpontSystemCatalog::FLOAT: @@ -2831,6 +2868,7 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) fRow.setFloatField(floatOut, colOut); bSetSuccess = true; } + break; case execplan::CalpontSystemCatalog::DOUBLE: @@ -2841,6 +2879,7 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) fRow.setDoubleField(doubleOut, colOut); bSetSuccess = true; } + break; case execplan::CalpontSystemCatalog::CHAR: @@ -2852,6 +2891,7 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) fRow.setStringField(strOut, colOut); bSetSuccess = true; } + break; case execplan::CalpontSystemCatalog::VARBINARY: @@ -2863,6 +2903,7 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) fRow.setVarBinaryField(strOut, colOut); bSetSuccess = true; } + break; default: @@ -2873,6 +2914,7 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut) break; } } + if (!bSetSuccess) { SetUDAFAnyValue(valOut, colOut); @@ -3404,14 +3446,17 @@ void RowAggregationUM::doNullConstantAggregate(const ConstantAggData& aggData, u fRGContext.setInterrupted(true); throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr); } + #if 0 uint32_t dataFlags[fRGContext.getParameterCount()]; + for (uint32_t i = 0; i < fRGContext.getParameterCount(); ++i) { mcsv1sdk::ColumnDatum& datum = valsIn[i]; // Turn on NULL flags dataFlags[i] = 0; } + #endif // Turn the NULL and CONSTANT flags on. uint32_t flags[1]; @@ -4278,19 +4323,20 @@ void RowAggregationUMP2::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut // colAux(in) - Where the UDAF userdata resides // rowUDAF(in) - pointer to the RowUDAFFunctionCol for this UDAF instance //------------------------------------------------------------------------------ -void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, +void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, uint64_t& funcColsIdx) { static_any::any valOut; // Get the user data - boost::shared_ptr userDataIn = rowIn.getUserData(colIn+1); + boost::shared_ptr userDataIn = rowIn.getUserData(colIn + 1); // Unlike other aggregates, the data isn't in colIn, so testing it for NULL // there won't help. In case of NULL, userData will be NULL. uint32_t flags[1]; flags[0] = 0; + if (!userDataIn) { if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index 14e4313cf..b593239cd 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -228,9 +228,9 @@ struct RowUDAFFunctionCol : public RowAggFunctionCol inputColIndex, outputColIndex, auxColIndex), bInterrupted(false) {} - RowUDAFFunctionCol(const RowUDAFFunctionCol& rhs) : - RowAggFunctionCol(ROWAGG_UDAF, ROWAGG_FUNCT_UNDEFINE, rhs.fInputColumnIndex, - rhs.fOutputColumnIndex, rhs.fAuxColumnIndex), + RowUDAFFunctionCol(const RowUDAFFunctionCol& rhs) : + RowAggFunctionCol(ROWAGG_UDAF, ROWAGG_FUNCT_UNDEFINE, rhs.fInputColumnIndex, + rhs.fOutputColumnIndex, rhs.fAuxColumnIndex), fUDAFContext(rhs.fUDAFContext), bInterrupted(false) {} @@ -249,6 +249,7 @@ inline void RowAggFunctionCol::serialize(messageqcpp::ByteStream& bs) const bs << (uint8_t)fAggFunction; bs << fInputColumnIndex; bs << fOutputColumnIndex; + if (fpConstCol) { bs << (uint8_t)1; @@ -258,7 +259,7 @@ inline void RowAggFunctionCol::serialize(messageqcpp::ByteStream& bs) const { bs << (uint8_t)0; } - + } inline void RowAggFunctionCol::deserialize(messageqcpp::ByteStream& bs) @@ -268,6 +269,7 @@ inline void RowAggFunctionCol::deserialize(messageqcpp::ByteStream& bs) bs >> fOutputColumnIndex; uint8_t t; bs >> t; + if (t) { fpConstCol.reset(new ConstantColumn); diff --git a/utils/threadpool/prioritythreadpool.cpp b/utils/threadpool/prioritythreadpool.cpp index c2326a78f..4c043ebbb 100644 --- a/utils/threadpool/prioritythreadpool.cpp +++ b/utils/threadpool/prioritythreadpool.cpp @@ -53,9 +53,9 @@ PriorityThreadPool::PriorityThreadPool(uint targetWeightPerRun, uint highThreads cout << "started " << highThreads << " high, " << midThreads << " med, " << lowThreads << " low.\n"; - defaultThreadCounts[HIGH] = threadCounts[HIGH] = highThreads; - defaultThreadCounts[MEDIUM] = threadCounts[MEDIUM] = midThreads; - defaultThreadCounts[LOW] = threadCounts[LOW] = lowThreads; + defaultThreadCounts[HIGH] = threadCounts[HIGH] = highThreads; + defaultThreadCounts[MEDIUM] = threadCounts[MEDIUM] = midThreads; + defaultThreadCounts[LOW] = threadCounts[LOW] = lowThreads; } PriorityThreadPool::~PriorityThreadPool() @@ -76,11 +76,13 @@ void PriorityThreadPool::addJob(const Job& job, bool useLock) threads.create_thread(ThreadHelper(this, HIGH)); threadCounts[HIGH]++; } + if (defaultThreadCounts[MEDIUM] != threadCounts[MEDIUM]) { threads.create_thread(ThreadHelper(this, MEDIUM)); threadCounts[MEDIUM]++; } + if (defaultThreadCounts[LOW] != threadCounts[LOW]) { threads.create_thread(ThreadHelper(this, LOW)); @@ -136,14 +138,15 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw() try { - while (!_stop) { + while (!_stop) + { mutex::scoped_lock lk(mutex); queue = pickAQueue(preferredQueue); - if (jobQueues[queue].empty()) { - if (jobQueues[queue].empty()) - { + + if (jobQueues[queue].empty()) + { newJob.wait(lk); continue; } @@ -158,8 +161,8 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw() // should leave some to the other threads while ((weight < weightPerRun) && (!jobQueues[queue].empty()) - && (runList.size() <= queueSize/2)) { - { + && (runList.size() <= queueSize / 2)) + { runList.push_back(jobQueues[queue].front()); jobQueues[queue].pop_front(); weight += runList.back().weight; @@ -169,25 +172,24 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw() reschedule.resize(runList.size()); rescheduleCount = 0; - for (i = 0; i < runList.size() && !_stop; i++) { - { - try + + for (i = 0; i < runList.size() && !_stop; i++) { - reschedule[i] = false; - running = true; - reschedule[i] = (*(runList[i].functor))(); - running = false; - if (reschedule[i]) - rescheduleCount++; + reschedule[i] = false; + running = true; + reschedule[i] = (*(runList[i].functor))(); + running = false; + + if (reschedule[i]) + rescheduleCount++; } - { // no real work was done, prevent intensive busy waiting if (rescheduleCount == runList.size()) usleep(1000); - if (rescheduleCount > 0) { - { + if (rescheduleCount > 0) + { lk.lock(); for (i = 0; i < runList.size(); i++) @@ -205,7 +207,7 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw() runList.clear(); } } - catch (std::exception &ex) + catch (std::exception& ex) { // Log the exception and exit this thread try @@ -224,6 +226,7 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw() ml.logErrorMessage( message ); #endif + if (running) sendErrorMsg(runList[i].uniqueID, runList[i].stepID, runList[i].sock); } @@ -250,6 +253,7 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw() ml.logErrorMessage( message ); #endif + if (running) sendErrorMsg(runList[i].uniqueID, runList[i].stepID, runList[i].sock); } @@ -261,17 +265,17 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw() void PriorityThreadPool::sendErrorMsg(uint32_t id, uint32_t step, primitiveprocessor::SP_UM_IOSOCK sock) { - ISMPacketHeader ism; - PrimitiveHeader ph = {0}; + ISMPacketHeader ism; + PrimitiveHeader ph = {0}; - ism.Status = logging::primitiveServerErr; - ph.UniqueID = id; - ph.StepID = step; - ByteStream msg(sizeof(ISMPacketHeader) + sizeof(PrimitiveHeader)); - msg.append((uint8_t *) &ism, sizeof(ism)); - msg.append((uint8_t *) &ph, sizeof(ph)); + ism.Status = logging::primitiveServerErr; + ph.UniqueID = id; + ph.StepID = step; + ByteStream msg(sizeof(ISMPacketHeader) + sizeof(PrimitiveHeader)); + msg.append((uint8_t*) &ism, sizeof(ism)); + msg.append((uint8_t*) &ph, sizeof(ph)); - sock->write(msg); + sock->write(msg); } void PriorityThreadPool::stop() diff --git a/utils/udfsdk/avgx.cpp b/utils/udfsdk/avgx.cpp index 887a8418e..5af852967 100644 --- a/utils/udfsdk/avgx.cpp +++ b/utils/udfsdk/avgx.cpp @@ -161,9 +161,11 @@ mcsv1_UDAF::ReturnCode avgx::subEvaluate(mcsv1Context* context, const UserData* } struct avgx_data* outData = (struct avgx_data*)context->getUserData()->data; + struct avgx_data* inData = (struct avgx_data*)userDataIn->data; outData->sum += inData->sum; + outData->cnt += inData->cnt; return mcsv1_UDAF::SUCCESS; diff --git a/utils/udfsdk/mcsv1_udaf.cpp b/utils/udfsdk/mcsv1_udaf.cpp index ee08dcc07..b042d63f5 100644 --- a/utils/udfsdk/mcsv1_udaf.cpp +++ b/utils/udfsdk/mcsv1_udaf.cpp @@ -120,7 +120,7 @@ bool mcsv1Context::operator==(const mcsv1Context& c) const // We don't test the per row data fields. They don't determine // if it's the same Context. if (getName() != c.getName() - ||fRunFlags != c.fRunFlags + || fRunFlags != c.fRunFlags || fContextFlags != c.fContextFlags || fUserDataSize != c.fUserDataSize || fResultType != c.fResultType diff --git a/utils/udfsdk/regr_avgx.cpp b/utils/udfsdk/regr_avgx.cpp index aec4f361f..e99871f97 100644 --- a/utils/udfsdk/regr_avgx.cpp +++ b/utils/udfsdk/regr_avgx.cpp @@ -82,6 +82,7 @@ mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* { return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. } + if (valIn_x.empty() || valIn_y.empty()) // Usually empty if NULL. Probably redundant { return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. @@ -162,9 +163,11 @@ mcsv1_UDAF::ReturnCode regr_avgx::subEvaluate(mcsv1Context* context, const UserD } struct regr_avgx_data* outData = (struct regr_avgx_data*)context->getUserData()->data; + struct regr_avgx_data* inData = (struct regr_avgx_data*)userDataIn->data; outData->sum += inData->sum; + outData->cnt += inData->cnt; return mcsv1_UDAF::SUCCESS; @@ -182,6 +185,7 @@ mcsv1_UDAF::ReturnCode regr_avgx::evaluate(mcsv1Context* context, static_any::an { valOut = data->sum / (double)data->cnt; } + return mcsv1_UDAF::SUCCESS; } diff --git a/utils/udfsdk/udfmysql.cpp b/utils/udfsdk/udfmysql.cpp index dc0277ccc..b0b2ebb9c 100644 --- a/utils/udfsdk/udfmysql.cpp +++ b/utils/udfsdk/udfmysql.cpp @@ -498,159 +498,163 @@ extern "C" */ struct regr_avgx_data { - double sumx; - int64_t cnt; + double sumx; + int64_t cnt; }; - - #ifdef _MSC_VER + +#ifdef _MSC_VER __declspec(dllexport) - #endif +#endif my_bool regr_avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) { - struct regr_avgx_data* data; - if (args->arg_count != 2) - { - strcpy(message,"regr_avgx() requires two arguments"); - return 1; - } + struct regr_avgx_data* data; - if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data)))) - { - strmov(message,"Couldn't allocate memory"); - return 1; - } - data->sumx = 0; + if (args->arg_count != 2) + { + strcpy(message, "regr_avgx() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data)))) + { + strmov(message, "Couldn't allocate memory"); + return 1; + } + + data->sumx = 0; data->cnt = 0; - initid->ptr = (char*)data; - return 0; + initid->ptr = (char*)data; + return 0; } - #ifdef _MSC_VER +#ifdef _MSC_VER __declspec(dllexport) - #endif +#endif void regr_avgx_deinit(UDF_INIT* initid) { - free(initid->ptr); - } + free(initid->ptr); + } - #ifdef _MSC_VER +#ifdef _MSC_VER __declspec(dllexport) - #endif +#endif void regr_avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), - char* message __attribute__((unused))) + char* message __attribute__((unused))) { - struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; - data->sumx = 0; + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + data->sumx = 0; data->cnt = 0; } - #ifdef _MSC_VER +#ifdef _MSC_VER __declspec(dllexport) - #endif +#endif void regr_avgx_add(UDF_INIT* initid, UDF_ARGS* args, - char* is_null, - char* message __attribute__((unused))) + char* is_null, + char* message __attribute__((unused))) { // TODO test for NULL in x and y - struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; - double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); ++data->cnt; - data->sumx += xval; + data->sumx += xval; } - #ifdef _MSC_VER +#ifdef _MSC_VER __declspec(dllexport) - #endif +#endif long long regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), - char* is_null, char* error __attribute__((unused))) + char* is_null, char* error __attribute__((unused))) { - struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; - return data->sumx / data->cnt; + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + return data->sumx / data->cnt; } //======================================================================= /** - * avgx connector stub. Exactly the same functionality as the - * built in avg() function. Use to test the performance of the - * API + * avgx connector stub. Exactly the same functionality as the + * built in avg() function. Use to test the performance of the + * API */ struct avgx_data { - double sumx; - int64_t cnt; + double sumx; + int64_t cnt; }; - - #ifdef _MSC_VER + +#ifdef _MSC_VER __declspec(dllexport) - #endif +#endif my_bool avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) { - struct avgx_data* data; - if (args->arg_count != 1) - { - strcpy(message,"avgx() requires one argument"); - return 1; - } + struct avgx_data* data; - if (!(data = (struct avgx_data*) malloc(sizeof(struct avgx_data)))) - { - strmov(message,"Couldn't allocate memory"); - return 1; - } - data->sumx = 0; + if (args->arg_count != 1) + { + strcpy(message, "avgx() requires one argument"); + return 1; + } + + if (!(data = (struct avgx_data*) malloc(sizeof(struct avgx_data)))) + { + strmov(message, "Couldn't allocate memory"); + return 1; + } + + data->sumx = 0; data->cnt = 0; - initid->ptr = (char*)data; - return 0; + initid->ptr = (char*)data; + return 0; } - #ifdef _MSC_VER +#ifdef _MSC_VER __declspec(dllexport) - #endif +#endif void avgx_deinit(UDF_INIT* initid) { - free(initid->ptr); - } + free(initid->ptr); + } - #ifdef _MSC_VER +#ifdef _MSC_VER __declspec(dllexport) - #endif +#endif void avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), - char* message __attribute__((unused))) + char* message __attribute__((unused))) { - struct avgx_data* data = (struct avgx_data*)initid->ptr; - data->sumx = 0; + struct avgx_data* data = (struct avgx_data*)initid->ptr; + data->sumx = 0; data->cnt = 0; } - #ifdef _MSC_VER +#ifdef _MSC_VER __declspec(dllexport) - #endif +#endif void avgx_add(UDF_INIT* initid, UDF_ARGS* args, - char* is_null, - char* message __attribute__((unused))) + char* is_null, + char* message __attribute__((unused))) { // TODO test for NULL in x and y - struct avgx_data* data = (struct avgx_data*)initid->ptr; - double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + struct avgx_data* data = (struct avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); ++data->cnt; - data->sumx += xval; + data->sumx += xval; } - #ifdef _MSC_VER +#ifdef _MSC_VER __declspec(dllexport) - #endif +#endif long long avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), - char* is_null, char* error __attribute__((unused))) + char* is_null, char* error __attribute__((unused))) { - struct avgx_data* data = (struct avgx_data*)initid->ptr; - return data->sumx / data->cnt; + struct avgx_data* data = (struct avgx_data*)initid->ptr; + return data->sumx / data->cnt; } } // vim:ts=4 sw=4: diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index ee48360f1..79ed61b52 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -152,7 +152,7 @@ void WF_udaf::parseParms(const std::vector& parms) { bRespectNulls = true; // The last parms: respect null | ignore null - ConstantColumn* cc = dynamic_cast(parms[parms.size()-1].get()); + ConstantColumn* cc = dynamic_cast(parms[parms.size() - 1].get()); idbassert(cc != NULL); bool isNull = false; // dummy, harded coded bRespectNulls = (cc->getIntVal(fRow, isNull) > 0); @@ -175,9 +175,10 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) // Put the parameter metadata (type, scale, precision) into valsIn mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) { - uint64_t colIn = fFieldIndex[i+1]; + uint64_t colIn = fFieldIndex[i + 1]; mcsv1sdk::ColumnDatum& datum = valsIn[i]; datum.dataType = fRow.getColType(colIn); datum.scale = fRow.getScale(colIn); @@ -196,9 +197,10 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { - uint64_t colIn = fFieldIndex[k+1]; + uint64_t colIn = fFieldIndex[k + 1]; mcsv1sdk::ColumnDatum& datum = valsIn[k]; flags[k] = 0; + if (fRow.isNullValue(colIn) == true) { if (!bRespectNulls) @@ -228,6 +230,7 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) datum.columnData = valIn; } + if (bHasNull) { continue; @@ -452,6 +455,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) mcsv1sdk::mcsv1_UDAF::ReturnCode rc; uint64_t colOut = fFieldIndex[0]; bool isNull = false; + if ((fFrameUnit == WF__FRAME_ROWS) || (fPrev == -1) || (!fPeer->operator()(getPointer(fRowData->at(c)), getPointer(fRowData->at(fPrev))))) @@ -469,10 +473,12 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // Put the parameter metadata (type, scale, precision) into valsIn mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; ConstantColumn* cc = NULL; + for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) { mcsv1sdk::ColumnDatum& datum = valsIn[i]; cc = static_cast(fConstantParms[i].get()); + if (cc) { datum.dataType = cc->resultType().colDataType; @@ -481,7 +487,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) } else { - uint64_t colIn = fFieldIndex[i+1]; + uint64_t colIn = fFieldIndex[i + 1]; datum.dataType = fRow.getColType(colIn); datum.scale = fRow.getScale(colIn); datum.precision = fRow.getPrecision(colIn); @@ -494,6 +500,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) getContext().clearContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); bool bHasNull = false; + for (int64_t i = b; i <= e; i++) { if (i % 1000 == 0 && fStep->cancelled()) @@ -504,16 +511,18 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // NULL flags uint32_t flags[getContext().getParameterCount()]; bHasNull = false; + for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { cc = static_cast(fConstantParms[k].get()); - uint64_t colIn = fFieldIndex[k+1]; + uint64_t colIn = fFieldIndex[k + 1]; mcsv1sdk::ColumnDatum& datum = valsIn[k]; // Turn on Null flags or skip based on respect nulls flags[k] = 0; + if ((!cc && fRow.isNullValue(colIn) == true) - || (cc && cc->type() == ConstantColumn::NULLDATA)) + || (cc && cc->type() == ConstantColumn::NULLDATA)) { if (!bRespectNulls) { @@ -535,6 +544,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) case CalpontSystemCatalog::BIGINT: { int64_t valIn; + if (cc) { valIn = cc->getIntVal(fRow, isNull); @@ -543,6 +553,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { getValue(colIn, valIn); } + // Check for distinct, if turned on. // Currently, distinct only works on the first parameter. if (k == 0) @@ -555,6 +566,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) if (fDistinct) fDistinctSet.insert(valIn); } + datum.columnData = valIn; break; } @@ -563,6 +575,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) case CalpontSystemCatalog::UDECIMAL: { int64_t valIn; + if (cc) { valIn = cc->getDecimalVal(fRow, isNull).value; @@ -571,6 +584,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { getValue(colIn, valIn); } + // Check for distinct, if turned on. // Currently, distinct only works on the first parameter. if (k == 0) @@ -583,6 +597,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) if (fDistinct) fDistinctSet.insert(valIn); } + datum.columnData = valIn; break; } @@ -594,6 +609,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) case CalpontSystemCatalog::UBIGINT: { uint64_t valIn; + if (cc) { valIn = cc->getUintVal(fRow, isNull); @@ -602,6 +618,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { getValue(colIn, valIn); } + // Check for distinct, if turned on. // Currently, distinct only works on the first parameter. if (k == 0) @@ -614,6 +631,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) if (fDistinct) fDistinctSet.insert(valIn); } + datum.columnData = valIn; break; } @@ -622,6 +640,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) case CalpontSystemCatalog::UDOUBLE: { double valIn; + if (cc) { valIn = cc->getDoubleVal(fRow, isNull); @@ -630,6 +649,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { getValue(colIn, valIn); } + // Check for distinct, if turned on. // Currently, distinct only works on the first parameter. if (k == 0) @@ -642,6 +662,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) if (fDistinct) fDistinctSet.insert(valIn); } + datum.columnData = valIn; break; } @@ -650,6 +671,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) case CalpontSystemCatalog::UFLOAT: { float valIn; + if (cc) { valIn = cc->getFloatVal(fRow, isNull); @@ -658,6 +680,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { getValue(colIn, valIn); } + // Check for distinct, if turned on. // Currently, distinct only works on the first parameter. if (k == 0) @@ -670,6 +693,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) if (fDistinct) fDistinctSet.insert(valIn); } + datum.columnData = valIn; break; } @@ -681,6 +705,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) case CalpontSystemCatalog::BLOB: { string valIn; + if (cc) { valIn = cc->getStrVal(fRow, isNull); @@ -689,6 +714,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { getValue(colIn, valIn); } + // Check for distinct, if turned on. // Currently, distinct only works on the first parameter. if (k == 0) @@ -701,6 +727,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) if (fDistinct) fDistinctSet.insert(valIn); } + datum.columnData = valIn; break; } @@ -717,13 +744,15 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) } } } + // Skip if any value is NULL and respect nulls is off. if (bHasNull) { continue; } + getContext().setDataFlags(flags); - + rc = getContext().getFunction()->nextValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::ERROR) diff --git a/utils/windowfunction/wf_udaf.h b/utils/windowfunction/wf_udaf.h index fc3f9006d..ef2ca5853 100644 --- a/utils/windowfunction/wf_udaf.h +++ b/utils/windowfunction/wf_udaf.h @@ -93,7 +93,7 @@ protected: bool bRespectNulls; // respect null | ignore null bool bHasDropValue; // Set to false when we discover the UDAnF doesn't implement dropValue. // To hold distinct values - std::tr1::unordered_set fDistinctSet; + std::tr1::unordered_set fDistinctSet; static_any::any fValOut; // The return value public: diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp index f5598a7e5..dfceb6364 100644 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -645,6 +645,7 @@ void WindowFunctionType::constParms(const std::vector& functionParms) for (size_t i = 0; i < functionParms.size(); ++i) { ConstantColumn* cc = dynamic_cast(functionParms[i].get()); + if (cc) { fConstantParms.push_back(functionParms[i]); diff --git a/utils/windowfunction/windowfunctiontype.h b/utils/windowfunction/windowfunctiontype.h index efa1c548a..5c2f43db0 100644 --- a/utils/windowfunction/windowfunctiontype.h +++ b/utils/windowfunction/windowfunctiontype.h @@ -199,7 +199,7 @@ public: } void constParms(const std::vector& functionParms); - + static boost::shared_ptr makeWindowFunction(const std::string&, int ct, WindowFunctionColumn* wc); protected: diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index 754a7b464..923871ef9 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -2247,31 +2247,32 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, { oldHwm = it->hwm; - // save hwm for the old extent - colWidth = colStructList[i].colWidth; - succFlag = colOp->calculateRowId(lastRid, BYTE_PER_BLOCK / colWidth, colWidth, curFbo, curBio); + // save hwm for the old extent + colWidth = colStructList[i].colWidth; + succFlag = colOp->calculateRowId(lastRid, BYTE_PER_BLOCK / colWidth, colWidth, curFbo, curBio); - //cout << "insertcolumnrec oid:rid:fbo:oldhwm = " << colStructList[i].dataOid << ":" << lastRid << ":" << curFbo << ":" << oldHwm << endl; - if (succFlag) - { - if ((HWM)curFbo >= oldHwm) + //cout << "insertcolumnrec oid:rid:fbo:oldhwm = " << colStructList[i].dataOid << ":" << lastRid << ":" << curFbo << ":" << oldHwm << endl; + if (succFlag) { - it->hwm = (HWM)curFbo; - } + if ((HWM)curFbo >= oldHwm) + { + it->hwm = (HWM)curFbo; + } - //@Bug 4947. set current to false for old extent. - if (newExtent) - { - it->current = false; - } + //@Bug 4947. set current to false for old extent. + if (newExtent) + { + it->current = false; + } - //cout << "updated old ext info for oid " << colStructList[i].dataOid << " dbroot:part:seg:hwm:current = " - //<< it->dbRoot<<":"<partNum<<":"<segNum<<":"<hwm<<":"<< it->current<< " and newExtent is " << newExtent << endl; - } - else - return ERR_INVALID_PARAM; + //cout << "updated old ext info for oid " << colStructList[i].dataOid << " dbroot:part:seg:hwm:current = " + //<< it->dbRoot<<":"<partNum<<":"<segNum<<":"<hwm<<":"<< it->current<< " and newExtent is " << newExtent << endl; + } + else + return ERR_INVALID_PARAM; } + //update hwm for the new extent if (newExtent) { @@ -2285,7 +2286,8 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, it++; } - colWidth = newColStructList[i].colWidth; + + colWidth = newColStructList[i].colWidth; succFlag = colOp->calculateRowId(lastRidNew, BYTE_PER_BLOCK / colWidth, colWidth, curFbo, curBio); if (succFlag) @@ -2356,29 +2358,29 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, curFbo)); } } - else - return ERR_INVALID_PARAM; - } + else + return ERR_INVALID_PARAM; } + } - // If we create a new extent for this batch - for (unsigned i = 0; i < newColStructList.size(); i++) + // If we create a new extent for this batch + for (unsigned i = 0; i < newColStructList.size(); i++) + { + colOp = m_colOp[op(newColStructList[i].fCompressionType)]; + width = newColStructList[i].colWidth; + successFlag = colOp->calculateRowId(lastRidNew, BYTE_PER_BLOCK / width, width, curFbo, curBio); + + if (successFlag) { - colOp = m_colOp[op(newColStructList[i].fCompressionType)]; - width = newColStructList[i].colWidth; - successFlag = colOp->calculateRowId(lastRidNew, BYTE_PER_BLOCK / width, width, curFbo, curBio); - - if (successFlag) + if (curFbo != lastFbo) { - if (curFbo != lastFbo) - { - RETURN_ON_ERROR(AddLBIDtoList(txnid, - lbids, - colDataTypes, - newColStructList[i], - curFbo)); - } + RETURN_ON_ERROR(AddLBIDtoList(txnid, + lbids, + colDataTypes, + newColStructList[i], + curFbo)); } + } else return ERR_INVALID_PARAM; } @@ -5136,7 +5138,7 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid, bool versioning) { int rc = 0; - void* valArray = NULL; + void* valArray = NULL; string segFile; Column curCol; ColStructList::size_type totalColumn; @@ -5167,23 +5169,25 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid, return rc; TableMetaData* aTbaleMetaData = TableMetaData::makeTableMetaData(tableOid); + if (totalRow1) { valArray = malloc(sizeof(uint64_t) * totalRow1); + for (i = 0; i < totalColumn; i++) { - //@Bug 2205 Check if all rows go to the new extent + //@Bug 2205 Check if all rows go to the new extent //Write the first batch - RID * firstPart = rowIdArray; + RID* firstPart = rowIdArray; ColumnOp* colOp = m_colOp[op(colStructList[i].fCompressionType)]; // set params colOp->initColumn(curCol); // need to pass real dbRoot, partition, and segment to setColParam colOp->setColParam(curCol, 0, colStructList[i].colWidth, - colStructList[i].colDataType, colStructList[i].colType, colStructList[i].dataOid, - colStructList[i].fCompressionType, colStructList[i].fColDbRoot, - colStructList[i].fColPartition, colStructList[i].fColSegment); + colStructList[i].colDataType, colStructList[i].colType, colStructList[i].dataOid, + colStructList[i].fCompressionType, colStructList[i].fColDbRoot, + colStructList[i].fColPartition, colStructList[i].fColSegment); ColExtsInfo aColExtsInfo = aTbaleMetaData->getColExtsInfo(colStructList[i].dataOid); ColExtsInfo::iterator it = aColExtsInfo.begin(); @@ -5199,7 +5203,7 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid, if (it == aColExtsInfo.end()) //add this one to the list { ColExtInfo aExt; - aExt.dbRoot =colStructList[i].fColDbRoot; + aExt.dbRoot = colStructList[i].fColDbRoot; aExt.partNum = colStructList[i].fColPartition; aExt.segNum = colStructList[i].fColSegment; aExt.compType = colStructList[i].fCompressionType; @@ -5210,18 +5214,18 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid, rc = colOp->openColumnFile(curCol, segFile, useTmpSuffix, IO_BUFF_SIZE); // @bug 5572 HDFS tmp file if (rc != NO_ERROR) - break; + break; // handling versioning vector rangeList; if (versioning) { - rc = processVersionBuffer(curCol.dataFile.pFile, txnid, colStructList[i], - colStructList[i].colWidth, totalRow1, firstPart, rangeList); - if (rc != NO_ERROR) { - if (rc != NO_ERROR) - { + rc = processVersionBuffer(curCol.dataFile.pFile, txnid, colStructList[i], + colStructList[i].colWidth, totalRow1, firstPart, rangeList); + + if (rc != NO_ERROR) + { if (colStructList[i].fCompressionType == 0) { curCol.dataFile.pFile->flush(); @@ -5241,39 +5245,39 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid, for (size_t j = 0; j < totalRow1; j++) { - uint64_t curValue = colValueList[((totalRow1 + totalRow2)*i) + j]; + uint64_t curValue = colValueList[((totalRow1 + totalRow2) * i) + j]; switch (colStructList[i].colType) { - case WriteEngine::WR_VARBINARY : // treat same as char for now - case WriteEngine::WR_CHAR: - case WriteEngine::WR_BLOB: - case WriteEngine::WR_TEXT: + case WriteEngine::WR_VARBINARY : // treat same as char for now + case WriteEngine::WR_CHAR: + case WriteEngine::WR_BLOB: + case WriteEngine::WR_TEXT: ((uint64_t*)valArray)[j] = curValue; break; - case WriteEngine::WR_INT: - case WriteEngine::WR_UINT: - case WriteEngine::WR_FLOAT: + case WriteEngine::WR_INT: + case WriteEngine::WR_UINT: + case WriteEngine::WR_FLOAT: tmp32 = curValue; ((uint32_t*)valArray)[j] = tmp32; break; - case WriteEngine::WR_ULONGLONG: - case WriteEngine::WR_LONGLONG: - case WriteEngine::WR_DOUBLE: - case WriteEngine::WR_TOKEN: + case WriteEngine::WR_ULONGLONG: + case WriteEngine::WR_LONGLONG: + case WriteEngine::WR_DOUBLE: + case WriteEngine::WR_TOKEN: ((uint64_t*)valArray)[j] = curValue; break; - case WriteEngine::WR_BYTE: - case WriteEngine::WR_UBYTE: + case WriteEngine::WR_BYTE: + case WriteEngine::WR_UBYTE: tmp8 = curValue; ((uint8_t*)valArray)[j] = tmp8; break; - case WriteEngine::WR_SHORT: - case WriteEngine::WR_USHORT: + case WriteEngine::WR_SHORT: + case WriteEngine::WR_USHORT: tmp16 = curValue; ((uint16_t*)valArray)[j] = tmp16; break; @@ -5282,11 +5286,11 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid, #ifdef PROFILE - timer.start("writeRow "); + timer.start("writeRow "); #endif rc = colOp->writeRow(curCol, totalRow1, firstPart, valArray); #ifdef PROFILE - timer.stop("writeRow "); + timer.stop("writeRow "); #endif colOp->closeColumnFile(curCol); @@ -5295,7 +5299,7 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid, // check error if (rc != NO_ERROR) - break; + break; } // end of for (i = 0 From e248920ad0574a478b95176029a591d96f5afd3d Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Fri, 22 Jun 2018 21:28:25 +0100 Subject: [PATCH 056/123] MCOL-1378 Add more hardening flags More modern stack and bounds protection flags. Most won't activate until GCC 8 is used but it makes us ready for that. --- CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4dd1f6ede..30eb38f6c 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,8 +111,8 @@ endif() INCLUDE(check_compiler_flag.cmake) -MY_CHECK_AND_SET_COMPILER_FLAG("-g -O3 -fno-omit-frame-pointer -fno-strict-aliasing -Wall -fno-tree-vectorize -DDBUG_OFF -DHAVE_CONFIG_H" RELEASE RELWITHDEBINFO MINSIZEREL) -MY_CHECK_AND_SET_COMPILER_FLAG("-ggdb3 -fno-omit-frame-pointer -fno-tree-vectorize -DSAFE_MUTEX -DSAFEMALLOC -DENABLED_DEBUG_SYNC -O0 -Wall -D_DEBUG -DHAVE_CONFIG_H" DEBUG) +MY_CHECK_AND_SET_COMPILER_FLAG("-g -O3 -fno-omit-frame-pointer -fno-strict-aliasing -Wall -fno-tree-vectorize -D_GLIBCXX_ASSERTIONS -DDBUG_OFF -DHAVE_CONFIG_H" RELEASE RELWITHDEBINFO MINSIZEREL) +MY_CHECK_AND_SET_COMPILER_FLAG("-ggdb3 -fno-omit-frame-pointer -fno-tree-vectorize -D_GLIBCXX_ASSERTIONS -DSAFE_MUTEX -DSAFEMALLOC -DENABLED_DEBUG_SYNC -O0 -Wall -D_DEBUG -DHAVE_CONFIG_H" DEBUG) # enable security hardening features, like most distributions do # in our benchmarks that costs about ~1% of performance, depending on the load @@ -128,6 +128,10 @@ IF(SECURITY_HARDENED) MY_CHECK_AND_SET_COMPILER_FLAG("-Wl,-z,relro,-z,now") MY_CHECK_AND_SET_COMPILER_FLAG("-fstack-protector --param=ssp-buffer-size=4") MY_CHECK_AND_SET_COMPILER_FLAG("-D_FORTIFY_SOURCE=2" RELEASE RELWITHDEBINFO) + MY_CHECK_AND_SET_COMPILER_FLAG("-fexceptions") + MY_CHECK_AND_SET_COMPILER_FLAG("-mcet -fcf-protection") + MY_CHECK_AND_SET_COMPILER_FLAG("-fstack-protector-strong") + MY_CHECK_AND_SET_COMPILER_FLAG("-fstack-clash-protection") ENDIF() SET (ENGINE_LDFLAGS "-Wl,--no-as-needed -Wl,--add-needed") From 5ddd510c310a41155c21e319fc2e7af7166d6f1e Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Sat, 23 Jun 2018 18:21:47 +0300 Subject: [PATCH 057/123] MCOL-1454 DDL parser supports reserved word TIME as a column name now. --- dbcon/ddlpackage/ddl.l | 2 +- dbcon/ddlpackage/ddl.y | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/dbcon/ddlpackage/ddl.l b/dbcon/ddlpackage/ddl.l index f3f2e7bfb..9e3b70672 100644 --- a/dbcon/ddlpackage/ddl.l +++ b/dbcon/ddlpackage/ddl.l @@ -120,7 +120,7 @@ CREATE {return CREATE;} CURRENT_USER {return CURRENT_USER;} DATE {ddlget_lval(yyscanner)->str=strdup("date"); return DATE;} DATETIME {return DATETIME;} -TIME {return TIME;} +TIME {ddlget_lval(yyscanner)->str=strdup("time"); return TIME;} DECIMAL {return DECIMAL;} DEC {return DECIMAL;} DEFAULT {return DEFAULT;} diff --git a/dbcon/ddlpackage/ddl.y b/dbcon/ddlpackage/ddl.y index e45df1289..4242fe93b 100644 --- a/dbcon/ddlpackage/ddl.y +++ b/dbcon/ddlpackage/ddl.y @@ -110,11 +110,11 @@ DECIMAL DEFAULT DEFERRABLE DEFERRED IDB_DELETE DROP ENGINE FOREIGN FULL IMMEDIATE INDEX INITIALLY IDB_INT INTEGER KEY LONGBLOB LONGTEXT MATCH MAX_ROWS MEDIUMBLOB MEDIUMTEXT MIN_ROWS MODIFY NO NOT NULL_TOK NUMBER NUMERIC ON PARTIAL PRECISION PRIMARY -REFERENCES RENAME RESTRICT SET SMALLINT TABLE TEXT TIME TINYBLOB TINYTEXT +REFERENCES RENAME RESTRICT SET SMALLINT TABLE TEXT TINYBLOB TINYTEXT TINYINT TO UNIQUE UNSIGNED UPDATE USER SESSION_USER SYSTEM_USER VARCHAR VARBINARY VARYING WITH ZONE DOUBLE IDB_FLOAT REAL CHARSET IDB_IF EXISTS CHANGE TRUNCATE -%token DQ_IDENT IDENT FCONST SCONST CP_SEARCH_CONDITION_TEXT ICONST DATE +%token DQ_IDENT IDENT FCONST SCONST CP_SEARCH_CONDITION_TEXT ICONST DATE TIME /* Notes: * 1. "ata" stands for alter_table_action @@ -644,7 +644,8 @@ ata_add_column: ; column_name: - DATE + TIME + |DATE |ident ; From 0c113f2bb300f0d1a8310f1a1e9dd6372b69da6b Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Sat, 23 Jun 2018 23:03:39 +0300 Subject: [PATCH 058/123] MCOL-1497 GROUP BY handler supports outer joins now. --- dbcon/mysql/ha_calpont_execplan.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 8df06c6b4..94deaf676 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -8089,9 +8089,9 @@ int cp_get_group_plan(THD* thd, SCSEP& csep, cal_impl_if::cal_group_info& gi) gwi.thd = thd; int status = getGroupPlan(gwi, select_lex, csep, gi); -// cerr << "---------------- cp_get_group_plan EXECUTION PLAN ----------------" << endl; -// cerr << *csep << endl ; -// cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; + cerr << "---------------- cp_get_group_plan EXECUTION PLAN ----------------" << endl; + cerr << *csep << endl ; + cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; if (status > 0) return ER_INTERNAL_ERROR; @@ -8386,7 +8386,9 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro gwi.rcWorkStack.push(new ConstantColumn((int64_t)0, ConstantColumn::NUM)); } - uint32_t failed = buildOuterJoin(gwi, select_lex); + SELECT_LEX tmp_select_lex; + tmp_select_lex.table_list.first = gi.groupByTables; + uint32_t failed = buildOuterJoin(gwi, tmp_select_lex); if (failed) return failed; From 0c98a9e745cc7cf70eed53a9cb2e562622d720e5 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Thu, 21 Jun 2018 14:15:01 +0300 Subject: [PATCH 059/123] MCOL-1457 GROUP BY handler supports aliases of projected columns in ORDER BY list. --- dbcon/mysql/ha_calpont_execplan.cpp | 34 +++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 8df06c6b4..ca862c938 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -8089,9 +8089,9 @@ int cp_get_group_plan(THD* thd, SCSEP& csep, cal_impl_if::cal_group_info& gi) gwi.thd = thd; int status = getGroupPlan(gwi, select_lex, csep, gi); -// cerr << "---------------- cp_get_group_plan EXECUTION PLAN ----------------" << endl; -// cerr << *csep << endl ; -// cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; + cerr << "---------------- cp_get_group_plan EXECUTION PLAN ----------------" << endl; + cerr << *csep << endl ; + cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; if (status > 0) return ER_INTERNAL_ERROR; @@ -9498,6 +9498,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro else { Item* ord_item = *(ordercol->item); + bool nonAggField = true; // ignore not_used column on order by. if (ord_item->type() == Item::INT_ITEM && ord_item->full_name() && string(ord_item->full_name()) == "Not_used") @@ -9506,11 +9507,36 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro rc = gwi.returnedCols[((Item_int*)ord_item)->val_int() - 1]->clone(); else if (ord_item->type() == Item::SUBSELECT_ITEM) gwi.fatalParseError = true; + else if (ordercol->in_field_list && ord_item->type() == Item::FIELD_ITEM) + { + rc = buildReturnedColumn(ord_item, gwi, gwi.fatalParseError); + Item_field* ifp = static_cast(ord_item); + + // The item must be an alias for a projected column + // and extended SELECT list must contain a proper rc + // either aggregation or a field. + if (!rc && ifp->name_length) + { + gwi.fatalParseError = false; + execplan::CalpontSelectExecutionPlan::ReturnedColumnList::iterator iter = gwi.returnedCols.begin(); + AggregateColumn* ac = NULL; + + for ( ; iter != gwi.returnedCols.end(); iter++ ) + { + if ( (*iter).get()->alias() == ord_item->name ) + { + rc = (*iter).get()->clone(); + nonAggField = rc->hasAggregate() ? false : true; + break; + } + } + } + } else rc = buildReturnedColumn(ord_item, gwi, gwi.fatalParseError); // Looking for a match for this item in GROUP BY list. - if ( rc && ord_item->type() == Item::FIELD_ITEM ) + if ( rc && ord_item->type() == Item::FIELD_ITEM && nonAggField) { execplan::CalpontSelectExecutionPlan::ReturnedColumnList::iterator iter = gwi.groupByCols.begin(); From f53cc735175796a288a9899dd13a337f55d9ae94 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Sat, 23 Jun 2018 23:03:39 +0300 Subject: [PATCH 060/123] MCOL-1497 GROUP BY handler supports outer joins now. --- dbcon/mysql/ha_calpont_execplan.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index ca862c938..c0bf47b95 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -8386,7 +8386,9 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro gwi.rcWorkStack.push(new ConstantColumn((int64_t)0, ConstantColumn::NUM)); } - uint32_t failed = buildOuterJoin(gwi, select_lex); + SELECT_LEX tmp_select_lex; + tmp_select_lex.table_list.first = gi.groupByTables; + uint32_t failed = buildOuterJoin(gwi, tmp_select_lex); if (failed) return failed; From 5fe9a9129088793ef8e4c433d21c77b4e2423c07 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Mon, 25 Jun 2018 11:17:50 +0100 Subject: [PATCH 061/123] MCOL-1496 Fix array bug in joiner Found using -D_GLIBCXX_ASSERTIONS --- utils/joiner/tuplejoiner.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/joiner/tuplejoiner.cpp b/utils/joiner/tuplejoiner.cpp index 671e63466..b8dc2937f 100644 --- a/utils/joiner/tuplejoiner.cpp +++ b/utils/joiner/tuplejoiner.cpp @@ -131,7 +131,7 @@ TupleJoiner::TupleJoiner( cpValues.reset(new vector[smallKeyColumns.size()]); for (uint32_t i = 0; i < smallKeyColumns.size(); i++) { discreteValues[i] = false; - if (isUnsigned(smallRG.getColType(i))) + if (isUnsigned(smallRG.getColTypes()[smallKeyColumns[i]])) { cpValues[i].push_back(static_cast(numeric_limits::max())); cpValues[i].push_back(0); @@ -859,7 +859,7 @@ boost::shared_ptr TupleJoiner::copyForDiskJoin() ret->cpValues.reset(new vector[smallKeyColumns.size()]); for (uint32_t i = 0; i < smallKeyColumns.size(); i++) { ret->discreteValues[i] = false; - if (isUnsigned(smallRG.getColType(i))) + if (isUnsigned(smallRG.getColTypes()[smallKeyColumns[i]])) { ret->cpValues[i].push_back(static_cast(numeric_limits::max())); ret->cpValues[i].push_back(0); From 9e3f92b6a3ed1cf213f2d0766968601ddc8df975 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Mon, 25 Jun 2018 15:10:28 +0100 Subject: [PATCH 062/123] MCOL-1330 Make debug flag let valgrind work Add a '-d' flag to WriteEngine, ExeMgr and PrimProc to let valgrind work with them. --- exemgr/main.cpp | 4 +++- primitives/primproc/primproc.cpp | 20 +++++++++++++++++++- writeengine/server/we_server.cpp | 19 ++++++++++++++++++- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/exemgr/main.cpp b/exemgr/main.cpp index 8106afb73..92f949f57 100644 --- a/exemgr/main.cpp +++ b/exemgr/main.cpp @@ -1354,7 +1354,9 @@ int main(int argc, char* argv[]) setenv("CALPONT_CSC_IDENT", "um", 1); #endif setupSignalHandlers(); - int err = setupResources(); + int err = 0; + if (!gDebug) + err = setupResources(); string errMsg; switch (err) { diff --git a/primitives/primproc/primproc.cpp b/primitives/primproc/primproc.cpp index b86e2f74d..ed7dcb67d 100644 --- a/primitives/primproc/primproc.cpp +++ b/primitives/primproc/primproc.cpp @@ -299,6 +299,22 @@ int main(int argc, char* argv[]) // This is unset due to the way we start it program_invocation_short_name = const_cast("PrimProc"); + int gDebug = 0; + int c; + + while ((c = getopt(argc, argv, "d")) != EOF) + { + switch(c) + { + case 'd': + gDebug++; + break; + case '?': + default: + break; + } + } + Config* cf = Config::makeConfig(); setupSignalHandlers(); @@ -307,7 +323,9 @@ int main(int argc, char* argv[]) mlp = new primitiveprocessor::Logger(); - int err = setupResources(); + int err = 0; + if (!gDebug) + err = setupResources(); string errMsg; switch (err) { diff --git a/writeengine/server/we_server.cpp b/writeengine/server/we_server.cpp index 9bdb6faa6..b68a81a0b 100644 --- a/writeengine/server/we_server.cpp +++ b/writeengine/server/we_server.cpp @@ -103,6 +103,21 @@ int main(int argc, char** argv) printf ("Locale is : %s\n", systemLang.c_str() ); + int gDebug = 0; + int c; + while ((c = getopt(argc, argv, "d")) != EOF) + { + switch (c) + { + case 'd': + gDebug++; + break; + case '?': + default: + break; + } + } + //set BUSY_INIT state { // Is there a reason to have a seperate Oam instance for this? @@ -196,7 +211,9 @@ int main(int argc, char** argv) } } - int err = setupResources(); + int err = 0; + if (!gDebug) + err = setupResources(); string errMsg; switch (err) { From af87f47a8f705befd5c219a5ea31042a4b7a318b Mon Sep 17 00:00:00 2001 From: Ravi Prakash Date: Mon, 25 Jun 2018 10:25:33 -0700 Subject: [PATCH 063/123] Fix MCOL-1155 to correctly process the NOT subtree. --- .gitignore | 2 +- dbcon/mysql/ha_calpont_execplan.cpp | 51 ++++++++++++++++++++++++++--- procmgr/processmanager.cpp | 2 +- 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index f8246dc9f..917ffb360 100644 --- a/.gitignore +++ b/.gitignore @@ -106,4 +106,4 @@ install_manifest_platform.txt install_manifest_storage-engine.txt _CPack_Packages columnstoreversion.h - +.idea/ diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index a30d71688..c47bd8d52 100755 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -1358,7 +1358,49 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip) idbassert(ifp->argument_count() == 1); ParseTree *ptp = 0; - if (isPredicateFunction(ifp->arguments()[0], gwip) || ifp->arguments()[0]->type() == Item::COND_ITEM) + if (((Item_func*)(ifp->arguments()[0]))->functype() == Item_func::EQUAL_FUNC) + { + // negate it in place + // Note that an EQUAL_FUNC ( a <=> b) was converted to + // ( a = b OR ( a is null AND b is null) ) + // NOT of the above expression is: ( a != b AND (a is not null OR b is not null ) + + if (!gwip->ptWorkStack.empty()) + ptp = gwip->ptWorkStack.top(); + + if (ptp) + { + ParseTree* or_ptp = ptp; + ParseTree* and_ptp = or_ptp->right(); + ParseTree* equal_ptp = or_ptp->left(); + ParseTree* nullck_left_ptp = and_ptp->left(); + ParseTree* nullck_right_ptp = and_ptp->right(); + SimpleFilter *sf = dynamic_cast(nullck_left_ptp->data()); + try + { + sf->op()->reverseOp(); + sf = dynamic_cast(nullck_right_ptp->data()); + sf->op()->reverseOp(); + sf = dynamic_cast(equal_ptp->data()); + sf->op()->reverseOp(); + // Rehook the trees + ptp = and_ptp; + ptp->left(equal_ptp); + ptp->right(or_ptp); + or_ptp->left(nullck_left_ptp); + or_ptp->right(nullck_right_ptp); + gwip->ptWorkStack.pop(); + gwip->ptWorkStack.push(ptp); + } + catch (std::exception& ex ) + { + gwip->fatalParseError = true; + gwip->parseErrorText = ex.what(); + return false; + } + } + } + else if (isPredicateFunction(ifp->arguments()[0], gwip) || ifp->arguments()[0]->type() == Item::COND_ITEM) { // negate it in place if (!gwip->ptWorkStack.empty()) @@ -1425,7 +1467,7 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip) } else if (ifp->functype() == Item_func::EQUAL_FUNC) { - // a = b OR (a IS NULL AND b IS NULL) + // Convert "a <=> b" to (a = b OR (a IS NULL AND b IS NULL))" idbassert (gwip->rcWorkStack.size() >= 2); ReturnedColumn* rhs = gwip->rcWorkStack.top(); gwip->rcWorkStack.pop(); @@ -1437,7 +1479,7 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip) // b IS NULL ConstantColumn *nlhs1 = new ConstantColumn("", ConstantColumn::NULLDATA); sop.reset(new PredicateOperator("isnull")); - sop->setOpType(lhs->resultType(), rhs->resultType()); + sop->setOpType(lhs->resultType(), rhs->resultType()); sfn1 = new SimpleFilter(sop, rhs, nlhs1); ParseTree* ptpl = new ParseTree(sfn1); // a IS NULL @@ -1452,7 +1494,7 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip) ptpn->right(ptpr); // a = b sop.reset(new PredicateOperator("=")); - sop->setOpType(lhs->resultType(), lhs->resultType()); + sop->setOpType(lhs->resultType(), rhs->resultType()); sfo = new SimpleFilter(sop, lhs->clone(), rhs->clone()); // OR with the NULL comparison tree ParseTree* ptp = new ParseTree(new LogicOperator("or")); @@ -7017,3 +7059,4 @@ int cp_get_table_plan(THD* thd, SCSEP& csep, cal_table_info& ti) return 0; } } +// vim:ts=4 sw=4: diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 8b01179d2..a6ca886af 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -9469,7 +9469,7 @@ int ProcessManager::OAMParentModuleChange() if ( ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM) && ( moduleNameList.size() <= 0 && config.moduleType() == "pm") ) { - int status = 0; + status = 0; } else { From 5ea9cad2d49041cc2f62f59e9e850e8a68e1c15f Mon Sep 17 00:00:00 2001 From: Ravi Prakash Date: Tue, 26 Jun 2018 13:13:59 -0700 Subject: [PATCH 064/123] MCOL-1155 Remove try-catch block by an explicit check for a null pointer. --- dbcon/mysql/ha_calpont_execplan.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index c47bd8d52..6113e7ca4 100755 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -1375,15 +1375,16 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip) ParseTree* equal_ptp = or_ptp->left(); ParseTree* nullck_left_ptp = and_ptp->left(); ParseTree* nullck_right_ptp = and_ptp->right(); - SimpleFilter *sf = dynamic_cast(nullck_left_ptp->data()); - try - { - sf->op()->reverseOp(); - sf = dynamic_cast(nullck_right_ptp->data()); - sf->op()->reverseOp(); - sf = dynamic_cast(equal_ptp->data()); - sf->op()->reverseOp(); - // Rehook the trees + SimpleFilter *sf_left_nullck = dynamic_cast(nullck_left_ptp->data()); + SimpleFilter *sf_right_nullck = dynamic_cast(nullck_right_ptp->data()); + SimpleFilter *sf_equal = dynamic_cast(equal_ptp->data()); + + if (sf_left_nullck && sf_right_nullck && sf_equal) { + // Negate the null checks + sf_left_nullck->op()->reverseOp(); + sf_right_nullck->op()->reverseOp(); + sf_equal->op()->reverseOp(); + // Rehook the nodes ptp = and_ptp; ptp->left(equal_ptp); ptp->right(or_ptp); @@ -1392,10 +1393,9 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip) gwip->ptWorkStack.pop(); gwip->ptWorkStack.push(ptp); } - catch (std::exception& ex ) - { + else { gwip->fatalParseError = true; - gwip->parseErrorText = ex.what(); + gwip->parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_ASSERTION_FAILURE); return false; } } From 476a5e551fe56384e58bf3c1982f5f002ea591c0 Mon Sep 17 00:00:00 2001 From: David Hill Date: Tue, 26 Jun 2018 19:11:57 -0500 Subject: [PATCH 065/123] remove rc.local chmod to 666 --- VERSION | 4 ++-- oam/install_scripts/module_installer.sh | 4 +--- oamapps/postConfigure/installer.cpp | 6 ++++++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/VERSION b/VERSION index 8a397bd3a..52b5353ee 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ COLUMNSTORE_VERSION_MAJOR=1 COLUMNSTORE_VERSION_MINOR=1 -COLUMNSTORE_VERSION_PATCH=6 -COLUMNSTORE_VERSION_RELEASE=1 +COLUMNSTORE_VERSION_PATCH=5 +COLUMNSTORE_VERSION_RELEASE=2 diff --git a/oam/install_scripts/module_installer.sh b/oam/install_scripts/module_installer.sh index 6ca39b70f..8910cca7f 100755 --- a/oam/install_scripts/module_installer.sh +++ b/oam/install_scripts/module_installer.sh @@ -186,7 +186,6 @@ if [ $module = "um" ]; then echo "echo deadline > /sys/block/$scsi_dev/queue/scheduler" >> $RCFILE echo "done" >> $RCFILE else - sudo chmod 666 $RCFILE sudo echo "for scsi_dev in \`mount | awk '/mnt\\/tmp/ {print $1}' | awk -F/ '{print $3}' | sed 's/[0-9]*$//'\`; do" >> $RCFILE sudo echo "echo deadline > /sys/block/$scsi_dev/queue/scheduler" >> $RCFILE sudo echo "done" >> $RCFILE @@ -201,7 +200,6 @@ else echo "echo deadline > /sys/block/$scsi_dev/queue/scheduler" >> $RCFILE echo "done" >> $RCFILE else - sudo chmod 666 $RCFILE sudo echo "for scsi_dev in \`mount | awk '/mnt\\/tmp/ {print $1}' | awk -F/ '{print $3}' | sed 's/[0-9]*$//'\`; do" >> $RCFILE sudo echo "echo deadline > /sys/block/$scsi_dev/queue/scheduler" >> $RCFILE sudo echo "done" >> $RCFILE @@ -214,7 +212,7 @@ fi if [ $user != "root" ]; then echo "uncomment runuser in rc.local" - sudo sed -i -e 's/#sudo runuser/sudo runuser/g' /etc/rc.d/rc.local >/dev/null 2>&1 + sudo sed -i -e 's/#sudo runuser/sudo runuser/g' $RCFILE >/dev/null 2>&1 fi echo "!!!Module Installation Successfully Completed!!!" diff --git a/oamapps/postConfigure/installer.cpp b/oamapps/postConfigure/installer.cpp index e85fb0989..1cc84ae0f 100644 --- a/oamapps/postConfigure/installer.cpp +++ b/oamapps/postConfigure/installer.cpp @@ -165,6 +165,12 @@ int main(int argc, char *argv[]) if (p && *p) USER = p; + // setup to start on reboot, for non-root amazon installs + if ( !rootUser ) + { + system("sudo sed -i -e 's/#sudo runuser/sudo runuser/g' /etc/rc.d/rc.local >/dev/null 2>&1"); + } + //copy Columnstore.xml.rpmsave if upgrade option is selected if ( installType == "upgrade" ) { cmd = "/bin/cp -f " + installDir + "/etc/Columnstore.xml " + installDir + "/etc/Columnstore.xml.new 2>&1"; From ffb76bb814eeb7bcd911a123a7e388a0e397b3d7 Mon Sep 17 00:00:00 2001 From: David Hill Date: Wed, 27 Jun 2018 14:44:36 -0500 Subject: [PATCH 066/123] MCOL-1467 - chnages to get back to 1.1.6 --- VERSION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/VERSION b/VERSION index 52b5353ee..8a397bd3a 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ COLUMNSTORE_VERSION_MAJOR=1 COLUMNSTORE_VERSION_MINOR=1 -COLUMNSTORE_VERSION_PATCH=5 -COLUMNSTORE_VERSION_RELEASE=2 +COLUMNSTORE_VERSION_PATCH=6 +COLUMNSTORE_VERSION_RELEASE=1 From f786cf9c9e420a8f7b9f44ef7e480287b421e8db Mon Sep 17 00:00:00 2001 From: david hill Date: Thu, 28 Jun 2018 10:42:57 -0500 Subject: [PATCH 067/123] MCOL-1145 --- README | 1 + 1 file changed, 1 insertion(+) diff --git a/README b/README index 90421faa8..7e48157db 100644 --- a/README +++ b/README @@ -10,3 +10,4 @@ series are included in this release. Additional features will be pushed in future releases. A few things to notice: - The building of the ColumnStore engine needs a special build environment. We're working on making it available for everyone to build. + From cdd9bed1ae74e65be6081782825225086a5f9ebe Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Tue, 3 Jul 2018 18:22:13 +0300 Subject: [PATCH 068/123] MCOL-1510 GROUP BY supports functions with aggregation funcs as arguments in projections, e.g. sum(i)+1. --- dbcon/mysql/ha_calpont.cpp | 40 +++++++++++++++ dbcon/mysql/ha_calpont.h | 30 +++++------ dbcon/mysql/ha_calpont_execplan.cpp | 77 +++++++++++++++++++++++++---- dbcon/mysql/ha_calpont_impl.cpp | 4 +- dbcon/mysql/ha_calpont_impl_if.h | 10 ++-- 5 files changed, 129 insertions(+), 32 deletions(-) diff --git a/dbcon/mysql/ha_calpont.cpp b/dbcon/mysql/ha_calpont.cpp index 79efdb88e..8d3996fe2 100644 --- a/dbcon/mysql/ha_calpont.cpp +++ b/dbcon/mysql/ha_calpont.cpp @@ -1170,6 +1170,46 @@ create_calpont_group_by_handler(THD* thd, Query* query) return handler; } +/*********************************************************** + * DESCRIPTION: + * GROUP BY handler constructor + * PARAMETERS: + * thd - THD pointer. + * query - Query describing structure + ***********************************************************/ +ha_calpont_group_by_handler::ha_calpont_group_by_handler(THD* thd_arg, Query* query) + : group_by_handler(thd_arg, calpont_hton), + select(query->select), + table_list(query->from), + distinct(query->distinct), + where(query->where), + group_by(query->group_by), + order_by(query->order_by), + having(query->having) +{ + List_iterator_fast item_iter(*select); + Item* item; + char* str = NULL; + while((item = item_iter++)) + { + String descr; + item->print(&descr, QT_ORDINARY); + str = new char[descr.length()+1]; + strncpy(str, descr.ptr(), descr.length()); + str[descr.length()] = '\0'; + select_list_descr.push_back(str); + } +} + +/*********************************************************** + * DESCRIPTION: + * GROUP BY destructor + ***********************************************************/ +ha_calpont_group_by_handler::~ha_calpont_group_by_handler() +{ + select_list_descr.delete_elements(); +} + /*********************************************************** * DESCRIPTION: * Makes the plan and prepares the data diff --git a/dbcon/mysql/ha_calpont.h b/dbcon/mysql/ha_calpont.h index bcbcdc5da..3c6f7e49e 100644 --- a/dbcon/mysql/ha_calpont.h +++ b/dbcon/mysql/ha_calpont.h @@ -255,12 +255,16 @@ public: * One should read comments in server/sql/group_by_handler.h * Attributes: * select - attribute contains all GROUP BY, HAVING, ORDER items and calls it - * an extended SELECT list accordin to comments in - * server/sql/group_handler.cc. - * So the temporary table for - * select count(*) from b group by a having a > 3 order by a - * will have 4 columns not 1. - * However server ignores all NULLs used in GROUP BY, HAVING, ORDER. + * an extended SELECT list according to comments in + * server/sql/group_handler.cc. + * So the temporary table for + * select count(*) from b group by a having a > 3 order by a + * will have 4 columns not 1. + * However server ignores all NULLs used in + * GROUP BY, HAVING, ORDER. + * select_list_descr - contains Item description returned by Item->print() + * that is used in lookup for corresponding columns in + * extended SELECT list. * table_list - contains all tables involved. Must be CS tables only. * distinct - looks like a useless thing for now. Couldn't get it set by server. * where - where items. @@ -275,22 +279,14 @@ public: class ha_calpont_group_by_handler: public group_by_handler { public: - ha_calpont_group_by_handler(THD* thd_arg, Query* query) - : group_by_handler(thd_arg, calpont_hton), - select(query->select), - table_list(query->from), - distinct(query->distinct), - where(query->where), - group_by(query->group_by), - order_by(query->order_by), - having(query->having) - { } - ~ha_calpont_group_by_handler() { } + ha_calpont_group_by_handler(THD* thd_arg, Query* query); + ~ha_calpont_group_by_handler(); int init_scan(); int next_row(); int end_scan(); List* select; + List select_list_descr; TABLE_LIST* table_list; bool distinct; Item* where; diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index c0bf47b95..b06c197aa 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -42,6 +42,9 @@ #include #include #include + +#include + using namespace std; #include @@ -188,6 +191,24 @@ bool nonConstFunc(Item_func* ifp) return false; } +ReturnedColumn* findCorrespTempField(Item_ref* item, gp_walk_info& gwi) +{ + ReturnedColumn* result = NULL; + uint32_t i; + for (i = 0; i < gwi.returnedCols.size(); i++) + { + if (item->ref[0] && item->ref[0]->name && + gwi.returnedCols[i]->alias().c_str() && + !strcasecmp(item->ref[0]->name, gwi.returnedCols[i]->alias().c_str())) + { + result = gwi.returnedCols[i]->clone(); + break; + } + } + + return result; +} + string getViewName(TABLE_LIST* table_ptr) { string viewName = ""; @@ -2739,7 +2760,7 @@ CalpontSystemCatalog::ColType colType_MysqlToIDB (const Item* item) return ct; } -ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupport) +ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupport, bool pushdownHand) { ReturnedColumn* rc = NULL; @@ -2864,7 +2885,7 @@ ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupp } if (func_name == "+" || func_name == "-" || func_name == "*" || func_name == "/" ) - return buildArithmeticColumn(ifp, gwi, nonSupport); + return buildArithmeticColumn(ifp, gwi, nonSupport, pushdownHand); else return buildFunctionColumn(ifp, gwi, nonSupport); } @@ -2998,7 +3019,11 @@ ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupp return rc; } -ArithmeticColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport) +ArithmeticColumn* buildArithmeticColumn( + Item_func* item, + gp_walk_info& gwi, + bool& nonSupport, + bool pushdownHand) { if (!(gwi.thd->infinidb_vtable.cal_conn_info)) gwi.thd->infinidb_vtable.cal_conn_info = (void*)(new cal_connection_info()); @@ -3021,7 +3046,7 @@ ArithmeticColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool { if (gwi.clauseType == SELECT || /*gwi.clauseType == HAVING || */gwi.clauseType == GROUP_BY || gwi.clauseType == FROM) // select list { - lhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport)); + lhs = new ParseTree(buildReturnedColumn(sfitempp[0], gwi, nonSupport, pushdownHand)); if (!lhs->data() && (sfitempp[0]->type() == Item::FUNC_ITEM)) { @@ -3029,8 +3054,15 @@ ArithmeticColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool Item_func* ifp = (Item_func*)sfitempp[0]; lhs = buildParseTree(ifp, gwi, nonSupport); } - - rhs = new ParseTree(buildReturnedColumn(sfitempp[1], gwi, nonSupport)); + else if(pushdownHand && !lhs->data() && (sfitempp[0]->type() == Item::REF_ITEM)) + { + // There must be an aggregation column in extended SELECT + // list so find the corresponding column. + ReturnedColumn* rc = findCorrespTempField(static_cast(sfitempp[0]), gwi); + if(rc) + lhs = new ParseTree(rc); + } + rhs = new ParseTree(buildReturnedColumn(sfitempp[1], gwi, nonSupport, pushdownHand)); if (!rhs->data() && (sfitempp[1]->type() == Item::FUNC_ITEM)) { @@ -3038,6 +3070,14 @@ ArithmeticColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool Item_func* ifp = (Item_func*)sfitempp[1]; rhs = buildParseTree(ifp, gwi, nonSupport); } + else if(pushdownHand && !rhs->data() && (sfitempp[1]->type() == Item::REF_ITEM)) + { + // There must be an aggregation column in extended SELECT + // list so find the corresponding column. + ReturnedColumn* rc = findCorrespTempField(static_cast(sfitempp[1]), gwi); + if(rc) + rhs = new ParseTree(rc); + } } else // where clause { @@ -3198,7 +3238,11 @@ ArithmeticColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool return ac; } -ReturnedColumn* buildFunctionColumn(Item_func* ifp, gp_walk_info& gwi, bool& nonSupport) +ReturnedColumn* buildFunctionColumn( + Item_func* ifp, + gp_walk_info& gwi, + bool& nonSupport, + bool pushdownHand) { if (!(gwi.thd->infinidb_vtable.cal_conn_info)) gwi.thd->infinidb_vtable.cal_conn_info = (void*)(new cal_connection_info()); @@ -3239,7 +3283,7 @@ ReturnedColumn* buildFunctionColumn(Item_func* ifp, gp_walk_info& gwi, bool& non // Arithmetic exp if (funcName == "+" || funcName == "-" || funcName == "*" || funcName == "/" ) { - ArithmeticColumn* ac = buildArithmeticColumn(ifp, gwi, nonSupport); + ArithmeticColumn* ac = buildArithmeticColumn(ifp, gwi, nonSupport, pushdownHand); return ac; } @@ -8087,6 +8131,7 @@ int cp_get_group_plan(THD* thd, SCSEP& csep, cal_impl_if::cal_group_info& gi) SELECT_LEX select_lex = lex->select_lex; gp_walk_info gwi; gwi.thd = thd; + gwi.groupByAuxDescr = gi.groupByAuxDescr; int status = getGroupPlan(gwi, select_lex, csep, gi); cerr << "---------------- cp_get_group_plan EXECUTION PLAN ----------------" << endl; @@ -8468,6 +8513,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro string sel_cols_in_create; string sel_cols_in_select; bool redo = false; + List_iterator_fast itDescr(*gi.groupByAuxDescr); + char* fieldDescr; // empty rcWorkStack and ptWorkStack. They should all be empty by now. clearStacks(gwi); @@ -8486,7 +8533,15 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro while ((item = it++)) { - string itemAlias = (item->name ? item->name : ""); + // Given the size of gi.groupByAuxDescr is equal to gi.groupByFields + fieldDescr = itDescr++; + string itemAlias; + if(item->name) + itemAlias = (item->name); + else + { + itemAlias = (fieldDescr ? fieldDescr: ""); + } // @bug 5916. Need to keep checking until getting concret item in case // of nested view. @@ -8592,6 +8647,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro return ER_CHECK_NOT_IMPLEMENTED; } + if(!ac->alias().length()) + ac->alias(fieldDescr); // add this agg col to returnedColumnList boost::shared_ptr spac(ac); gwi.returnedCols.push_back(spac); @@ -8651,7 +8708,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro return ER_CHECK_NOT_IMPLEMENTED; } - ReturnedColumn* rc = buildFunctionColumn(ifp, gwi, hasNonSupportItem); + ReturnedColumn* rc = buildFunctionColumn(ifp, gwi, hasNonSupportItem, true); SRCP srcp(rc); if (rc) diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 3d4ee6ac3..9a018068d 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -5265,6 +5265,7 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE // MCOL-1052 Send Items lists down to the optimizer. gi.groupByTables = group_hand->table_list; gi.groupByFields = group_hand->select; + gi.groupByAuxDescr = &group_hand->select_list_descr; gi.groupByWhere = group_hand->where; gi.groupByGroup = group_hand->group_by; gi.groupByOrder = group_hand->order_by; @@ -5831,7 +5832,8 @@ int ha_calpont_impl_group_by_end(ha_calpont_group_by_handler* group_hand, TABLE* try { - sm::tpl_close(ti.tpl_ctx, &hndl, ci->stats); + if(hndl) + sm::tpl_close(ti.tpl_ctx, &hndl, ci->stats); ci->cal_conn_hndl = hndl; diff --git a/dbcon/mysql/ha_calpont_impl_if.h b/dbcon/mysql/ha_calpont_impl_if.h index cb603ca49..40e746917 100644 --- a/dbcon/mysql/ha_calpont_impl_if.h +++ b/dbcon/mysql/ha_calpont_impl_if.h @@ -142,6 +142,7 @@ struct gp_walk_info std::map derivedTbFilterMap; uint32_t derivedTbCnt; std::vector subselectList; + List* groupByAuxDescr; // Kludge for Bug 750 int32_t recursionLevel; @@ -195,6 +196,7 @@ struct cal_table_info struct cal_group_info { cal_group_info() : groupByFields(0), + groupByAuxDescr(0), groupByTables(0), groupByWhere(0), groupByGroup(0), @@ -205,6 +207,7 @@ struct cal_group_info ~cal_group_info() { } List* groupByFields; // MCOL-1052 SELECT + List* groupByAuxDescr; //MCOL-1052 Auxilary column descriptions TABLE_LIST* groupByTables; // MCOL-1052 FROM Item* groupByWhere; // MCOL-1052 WHERE ORDER* groupByGroup; // MCOL-1052 GROUP BY @@ -327,14 +330,13 @@ void setError(THD* thd, uint32_t errcode, const std::string errmsg, gp_walk_info void setError(THD* thd, uint32_t errcode, const std::string errmsg); void gp_walk(const Item* item, void* arg); void parse_item (Item* item, std::vector& field_vec, bool& hasNonSupportItem, uint16& parseInfo); -execplan::ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupport); const std::string bestTableName(const Item_field* ifp); bool isInfiniDB(TABLE* table_ptr); // execution plan util functions prototypes -execplan::ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupport); -execplan::ReturnedColumn* buildFunctionColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport); -execplan::ArithmeticColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport); +execplan::ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupport, bool pushdownHand = false); +execplan::ReturnedColumn* buildFunctionColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport, bool pushdownHand = false); +execplan::ArithmeticColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool& nonSupport, bool pushdownHand = false); execplan::ConstantColumn* buildDecimalColumn(Item* item, gp_walk_info& gwi); execplan::SimpleColumn* buildSimpleColumn(Item_field* item, gp_walk_info& gwi); execplan::FunctionColumn* buildCaseFunction(Item_func* item, gp_walk_info& gwi, bool& nonSupport); From 77b52a6a32ff96e2c99695c93d5d8567861bb379 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Wed, 23 May 2018 22:31:21 +0300 Subject: [PATCH 069/123] MCOL-1446 CS sorting direction aligned with the server`s. --- dbcon/joblist/limitedorderby.cpp | 70 ++++++++++++++++++++++---------- utils/rowgroup/rowgroup.h | 9 +++- 2 files changed, 57 insertions(+), 22 deletions(-) diff --git a/dbcon/joblist/limitedorderby.cpp b/dbcon/joblist/limitedorderby.cpp index 4f3366cc6..7da1accfe 100644 --- a/dbcon/joblist/limitedorderby.cpp +++ b/dbcon/joblist/limitedorderby.cpp @@ -171,15 +171,18 @@ void LimitedOrderBy::processRow(const rowgroup::Row& row) } } - +/* + * The f() copies top element from an ordered queue into a row group. It + * does this backwards to syncronise sorting orientation with the server. + * The top row from the queue goes last into the returned set. + */ void LimitedOrderBy::finalize() { + queue tempQueue; if (fRowGroup.getRowCount() > 0) fDataQueue.push(fData); - // MCOL-1052 The removed check effectively disables sorting, - // since fStart = 0 if there is no OFFSET; - if (true) + if (fOrderByQueue.size() > 0) { uint64_t newSize = fRowsPerRG * fRowGroup.getRowSize(); fMemSize += newSize; @@ -190,27 +193,49 @@ void LimitedOrderBy::finalize() << " @" << __FILE__ << ":" << __LINE__; throw IDBExcept(fErrorCode); } - + + uint64_t offset = 0; + uint64_t i = 0; + list tempRGDataList; + + // Skip first LIMIT rows in the the RowGroup + if ( fCount <= fOrderByQueue.size() ) + { + offset = fCount % fRowsPerRG; + if(!offset && fCount > 0) + offset = fRowsPerRG; + } + else + { + offset = fOrderByQueue.size() % fRowsPerRG; + if(!offset && fOrderByQueue.size() > 0) + offset = fRowsPerRG; + } + + list::iterator tempListIter = tempRGDataList.begin(); + + i = 0; + uint32_t rSize = fRow0.getSize(); + uint64_t preLastRowNumb = fRowsPerRG - 1; fData.reinit(fRowGroup, fRowsPerRG); fRowGroup.setData(&fData); fRowGroup.resetRowGroup(0); - fRowGroup.getRow(0, &fRow0); - queue tempQueue; - uint64_t i = 0; - + offset = offset != 0 ? offset - 1 : offset; + fRowGroup.getRow(offset, &fRow0); + while ((fOrderByQueue.size() > fStart) && (i++ < fCount)) { const OrderByRow& topRow = fOrderByQueue.top(); row1.setData(topRow.fData); copyRow(row1, &fRow0); - //memcpy(fRow0.getData(), topRow.fData, fRow0.getSize()); fRowGroup.incRowCount(); - fRow0.nextRow(); + offset--; + fRow0.prevRow(rSize); fOrderByQueue.pop(); - if (fRowGroup.getRowCount() >= fRowsPerRG) + if(offset == (uint64_t)-1) { - tempQueue.push(fData); + tempRGDataList.push_front(fData); fMemSize += newSize; if (!fRm->getMemory(newSize, fSessionMemLimit)) @@ -219,18 +244,21 @@ void LimitedOrderBy::finalize() << " @" << __FILE__ << ":" << __LINE__; throw IDBExcept(fErrorCode); } - - fData.reinit(fRowGroup, fRowsPerRG); - //fData.reset(new uint8_t[fRowGroup.getDataSize(fRowsPerRG)]); + + fData.reinit(fRowGroup, fRowsPerRG); fRowGroup.setData(&fData); - fRowGroup.resetRowGroup(0); - fRowGroup.getRow(0, &fRow0); + fRowGroup.resetRowGroup(0); // ? + fRowGroup.getRow(preLastRowNumb, &fRow0); + offset = preLastRowNumb; } } - + // Push the last/only group into the queue. if (fRowGroup.getRowCount() > 0) - tempQueue.push(fData); - + tempRGDataList.push_front(fData); + + for(tempListIter = tempRGDataList.begin(); tempListIter != tempRGDataList.end(); tempListIter++) + tempQueue.push(*tempListIter); + fDataQueue = tempQueue; } } diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h index 896da1f4a..f1fc39dcc 100644 --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -355,7 +355,8 @@ public: */ template void setUintField_offset(uint64_t val, uint32_t offset); inline void nextRow(uint32_t size); - + inline void prevRow(uint32_t size, uint64_t number); + inline void setUintField(uint64_t val, uint32_t colIndex); template void setIntField(int64_t, uint32_t colIndex); inline void setIntField(int64_t, uint32_t colIndex); @@ -896,6 +897,12 @@ inline void Row::nextRow(uint32_t size) data += size; } + +inline void Row::prevRow(uint32_t size, uint64_t number = 1) +{ + data -= size * number; +} + template inline void Row::setUintField(uint64_t val, uint32_t colIndex) { From 58dcdb341a2a3579b18e77d8a26e03e02a999858 Mon Sep 17 00:00:00 2001 From: David Hill Date: Thu, 5 Jul 2018 11:03:07 -0500 Subject: [PATCH 070/123] MCOL-1525 - fix nonroot corefile generating issue --- oam/install_scripts/columnstore | 6 +----- oam/install_scripts/post-install | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/oam/install_scripts/columnstore b/oam/install_scripts/columnstore index 038ad0c43..15c00cea5 100644 --- a/oam/install_scripts/columnstore +++ b/oam/install_scripts/columnstore @@ -94,13 +94,9 @@ start() { CoreFileFlag=`$InstallDir/bin/getConfig -c $InstallDir/etc/Columnstore.xml Installation CoreFileFlag` if [ $CoreFileFlag = "y" ]; then - SUDO= - if [ "$user" != "root" ]; then - SUDO="$SUDO" - fi #columnstore core files - $SUDO ulimit -c unlimited > /dev/null 2>&1 + ulimit -c unlimited > /dev/null 2>&1 $SUDO sysctl -q -w kernel.core_uses_pid=1 > /dev/null 2>&1 $SUDO sysctl -q -w kernel.core_pattern=/var/log/mariadb/columnstore/corefiles/core.%e.%p > /dev/null 2>&1 fi diff --git a/oam/install_scripts/post-install b/oam/install_scripts/post-install index f7aeeb2ca..4eaa28dda 100755 --- a/oam/install_scripts/post-install +++ b/oam/install_scripts/post-install @@ -164,7 +164,7 @@ mkdir -p /tmp/columnstore_tmp_files >/dev/null 2>&1 #setup core file directory and link mkdir /var/log/mariadb/columnstore/corefiles > /dev/null 2>&1 -chmod 755 /var/log/mariadb/columnstore/corefiles > /dev/null 2>&1 +chmod 777 /var/log/mariadb/columnstore/corefiles > /dev/null 2>&1 #create mount directories mkdir /mnt/tmp > /dev/null 2>&1 From d73d1bb8c1d8d731dd8111082a47b3d3a6cf75b5 Mon Sep 17 00:00:00 2001 From: David Hill Date: Thu, 5 Jul 2018 13:57:27 -0500 Subject: [PATCH 071/123] fix issue with systemname save --- oamapps/postConfigure/postConfigure.cpp | 442 +++++++++++++----------- 1 file changed, 245 insertions(+), 197 deletions(-) diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index 4225195de..cd17b880c 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ + MA 02110-1301, USA. */ /****************************************************************************************** * $Id: postConfigure.cpp 64 2006-10-12 22:21:51Z dhill $ @@ -112,7 +112,7 @@ bool makeRClocal(string moduleType, string moduleName, int IserverTypeInstall); bool createDbrootDirs(string DBRootStorageType); bool pkgCheck(std::string columnstorePackage); bool storageSetup(bool amazonInstall); -void setSystemName(); +void setSystemName(bool single_server_quick_install = false); bool singleServerDBrootSetup(); bool copyFstab(string moduleName); bool attachVolume(string instanceName, string volumeName, string deviceName, string dbrootPath); @@ -219,6 +219,8 @@ int main(int argc, char *argv[]) noPrompting = false; string password; string cmd; + bool single_server_quick_install = false; + // struct sysinfo myinfo; // hidden options @@ -280,33 +282,36 @@ int main(int argc, char *argv[]) cout << "will perform a Package Installation of all of the Modules within the" << endl; cout << "System that is being configured." << endl; cout << endl; - cout << "IMPORTANT: This tool should only be run on a Performance Module Server," << endl; - cout << " preferably Module #1" << endl; + cout << "IMPORTANT: This tool is required to run on a Performance Module #1 (pm1) Server." << endl; cout << endl; cout << "Instructions:" << endl << endl; cout << " Press 'enter' to accept a value in (), if available or" << endl; cout << " Enter one of the options within [], if available, or" << endl; cout << " Enter a new value" << endl << endl; cout << endl; - cout << "Usage: postConfigure [-h][-c][-u][-p][-s][-port][-i][-n]" << endl; + cout << "Usage: postConfigure [-h][-c][-u][-p][-sq][-port][-i][-n][-sn]" << endl; cout << " -h Help" << endl; cout << " -c Config File to use to extract configuration data, default is Columnstore.xml.rpmsave" << endl; cout << " -u Upgrade, Install using the Config File from -c, default to Columnstore.xml.rpmsave" << endl; cout << " If ssh-keys aren't setup, you should provide passwords as command line arguments" << endl; cout << " -p Unix Password, used with no-prompting option" << endl; - cout << " -s Single Threaded Remote Install" << endl; + cout << " -sq Single Server Quick Install" << endl; cout << " -port MariaDB ColumnStore Port Address" << endl; cout << " -i Non-root Install directory, Only use for non-root installs" << endl; cout << " -n Non-distributed install, meaning it will not install the remote nodes" << endl; + cout << " -sn System Name" << endl; exit (0); } - else if( string("-s") == argv[i] ) - thread_remote_installer = false; - else if( string("-f") == argv[i] ) - nodeps = "--nodeps"; - else if( string("-o") == argv[i] ) - startOfflinePrompt = true; - else if( string("-c") == argv[i] ) { + else if( string("-sq") == argv[i] ) + { + single_server_quick_install = true; + noPrompting = true; + } + else if( string("-f") == argv[i] ) + nodeps = "--nodeps"; + else if( string("-o") == argv[i] ) + startOfflinePrompt = true; + else if( string("-c") == argv[i] ) { i++; if (i >= argc ) { cout << " ERROR: Config File not provided" << endl; @@ -355,6 +360,14 @@ int main(int argc, char *argv[]) exit (1); } installDir = argv[i]; + } + else if( string("-sn") == argv[i] ) { + i++; + if (i >= argc ) { + cout << " ERROR: System-name not provided" << endl; + exit (1); + } + systemName = argv[i]; } else { @@ -379,41 +392,43 @@ int main(int argc, char *argv[]) cout << "Installation of all of the Servers within the System that is being configured." << endl; cout << endl; - cout << "IMPORTANT: This tool should only be run on the Parent OAM Module" << endl; - cout << " which is a Performance Module, preferred Module #1" << endl; + cout << "IMPORTANT: This tool requires to run on the Performance Module #1" << endl; cout << endl; - if (!noPrompting) { - cout << "Prompting instructions:" << endl << endl; - cout << " Press 'enter' to accept a value in (), if available or" << endl; - cout << " Enter one of the options within [], if available, or" << endl; - cout << " Enter a new value" << endl << endl; - } - else + if (!single_server_quick_install) { - //get current time and date - time_t now; - now = time(NULL); - struct tm tm; - localtime_r(&now, &tm); - char timestamp[200]; - strftime (timestamp, 200, "%m:%d:%y-%H:%M:%S", &tm); - string currentDate = timestamp; + if (!noPrompting) { + cout << "Prompting instructions:" << endl << endl; + cout << " Press 'enter' to accept a value in (), if available or" << endl; + cout << " Enter one of the options within [], if available, or" << endl; + cout << " Enter a new value" << endl << endl; + } + else + { + //get current time and date + time_t now; + now = time(NULL); + struct tm tm; + localtime_r(&now, &tm); + char timestamp[200]; + strftime (timestamp, 200, "%m:%d:%y-%H:%M:%S", &tm); + string currentDate = timestamp; - string postConfigureLog = "/var/log/columnstore-postconfigure-" + currentDate; + string postConfigureLog = "/var/log/columnstore-postconfigure-" + currentDate; - cout << "With the no-Prompting Option being specified, you will be required to have the following:" << endl; - cout << endl; - cout << " 1. Root user ssh keys setup between all nodes in the system or" << endl; - cout << " use the password command line option." << endl; - cout << " 2. A Configure File to use to retrieve configure data, default to Columnstore.xml.rpmsave" << endl; - cout << " or use the '-c' option to point to a configuration file." << endl; - cout << endl; -// cout << " Output if being redirected to " << postConfigureLog << endl; + cout << "With the no-Prompting Option being specified, you will be required to have the following:" << endl; + cout << endl; + cout << " 1. Root user ssh keys setup between all nodes in the system or" << endl; + cout << " use the password command line option." << endl; + cout << " 2. A Configure File to use to retrieve configure data, default to Columnstore.xml.rpmsave" << endl; + cout << " or use the '-c' option to point to a configuration file." << endl; + cout << endl; + // cout << " Output if being redirected to " << postConfigureLog << endl; -// redirectStandardOutputToFile(postConfigureLog, false ); + // redirectStandardOutputToFile(postConfigureLog, false ); + } } - + //check if MariaDB ColumnStore is up and running if (oam.checkSystemRunning()) { cout << "MariaDB ColumnStore is running, can't run postConfigure while MariaDB ColumnStore is running. Exiting.." << endl; @@ -421,45 +436,51 @@ int main(int argc, char *argv[]) } //check Config saved files - if ( !checkSaveConfigFile()) + if (!single_server_quick_install) { - cout << "ERROR: Configuration File not setup" << endl; - exit(1); + if ( !checkSaveConfigFile()) + { + cout << "ERROR: Configuration File not setup" << endl; + exit(1); + } } - + //determine package type string EEPackageType; - if (!rootUser) - EEPackageType = "binary"; - else - { - int rtnCode = system("rpm -qi mariadb-columnstore-platform > /tmp/columnstore.txt 2>&1"); - if (WEXITSTATUS(rtnCode) == 0) - EEPackageType = "rpm"; - else { - rtnCode = system("dpkg -s mariadb-columnstore-platform > /tmp/columnstore.txt 2>&1"); - if (WEXITSTATUS(rtnCode) == 0) - EEPackageType = "deb"; - else - EEPackageType = "binary"; - } - } + if (single_server_quick_install) + { + if (!rootUser) + EEPackageType = "binary"; + else + { + int rtnCode = system("rpm -qi mariadb-columnstore-platform > /tmp/columnstore.txt 2>&1"); + if (WEXITSTATUS(rtnCode) == 0) + EEPackageType = "rpm"; + else { + rtnCode = system("dpkg -s mariadb-columnstore-platform > /tmp/columnstore.txt 2>&1"); + if (WEXITSTATUS(rtnCode) == 0) + EEPackageType = "deb"; + else + EEPackageType = "binary"; + } + } - try { - sysConfig->setConfig(InstallSection, "EEPackageType", EEPackageType); - } - catch(...) - { - cout << "ERROR: Problem setting EEPackageType from the MariaDB ColumnStore System Configuration file" << endl; - exit(1); - } + try { + sysConfig->setConfig(InstallSection, "EEPackageType", EEPackageType); + } + catch(...) + { + cout << "ERROR: Problem setting EEPackageType from the MariaDB ColumnStore System Configuration file" << endl; + exit(1); + } - if ( !writeConfig(sysConfig) ) { - cout << "ERROR: Failed trying to update MariaDB ColumnStore System Configuration file" << endl; - exit(1); + if ( !writeConfig(sysConfig) ) { + cout << "ERROR: Failed trying to update MariaDB ColumnStore System Configuration file" << endl; + exit(1); + } } - + //check for local ip address as pm1 ModuleConfig moduleconfig; @@ -551,7 +572,6 @@ int main(int argc, char *argv[]) catch(...) {} - // run my.cnf upgrade script if ( reuseConfig == "y" ) { @@ -615,127 +635,148 @@ int main(int argc, char *argv[]) } cout << endl; + + if (single_server_quick_install) + { + cout << "===== Quick Single-Server Install Configuration =====" << endl << endl; - cout << "===== Setup System Server Type Configuration =====" << endl << endl; - - cout << "There are 2 options when configuring the System Server Type: single and multi" << endl << endl; - cout << " 'single' - Single-Server install is used when there will only be 1 server configured" << endl; - cout << " on the system. It can also be used for production systems, if the plan is" << endl; - cout << " to stay single-server." << endl << endl; - cout << " 'multi' - Multi-Server install is used when you want to configure multiple servers now or" << endl; - cout << " in the future. With Multi-Server install, you can still configure just 1 server" << endl; - cout << " now and add on addition servers/modules in the future." << endl << endl; - - string temp; - try { - temp = sysConfig->getConfig(InstallSection, "SingleServerInstall"); - } - catch(...) - {} - - if ( temp == "y" ) + cout << "Single-Server install is used when there will only be 1 server configured" << endl; + cout << "on the system. It can also be used for production systems, if the plan is" << endl; + cout << "to stay single-server." << endl; + singleServerInstall = "1"; + } else - singleServerInstall = "2"; + { + cout << "===== Setup System Server Type Configuration =====" << endl << endl; + + cout << "There are 2 options when configuring the System Server Type: single and multi" << endl << endl; + cout << " 'single' - Single-Server install is used when there will only be 1 server configured" << endl; + cout << " on the system. It can also be used for production systems, if the plan is" << endl; + cout << " to stay single-server." << endl << endl; + cout << " 'multi' - Multi-Server install is used when you want to configure multiple servers now or" << endl; + cout << " in the future. With Multi-Server install, you can still configure just 1 server" << endl; + cout << " now and add on addition servers/modules in the future." << endl << endl; - while(true) { - prompt = "Select the type of System Server install [1=single, 2=multi] (" + singleServerInstall + ") > "; - pcommand = callReadline(prompt.c_str()); string temp; - if (pcommand) { - if (strlen(pcommand) > 0) - temp = pcommand; - else - temp = singleServerInstall; - callFree(pcommand); - if (temp == "1") { - singleServerInstall = temp; - cout << endl << "Performing the Single Server Install." << endl; + try { + temp = sysConfig->getConfig(InstallSection, "SingleServerInstall"); + } + catch(...) + {} - if ( reuseConfig == "n" ) { - //setup to use the single server Columnstore.xml file + if ( temp == "y" ) + singleServerInstall = "1"; + else + singleServerInstall = "2"; - // we know that our Config instance just timestamped itself in the getConfig - // call above. if postConfigure is running non-interactively we may get here - // within the same second which means the changes that are about to happen - // when Columnstore.xml gets overwritten will be ignored because of the Config - // instance won't know to reload - sleep(2); + while(true) { + string temp = singleServerInstall; + prompt = "Select the type of System Server install [1=single, 2=multi] (" + singleServerInstall + ") > "; + pcommand = callReadline(prompt.c_str()); + if (pcommand) { + if (strlen(pcommand) > 0) + temp = pcommand; + else + temp = singleServerInstall; + + callFree(pcommand); - cmd = "rm -f " + installDir + "/etc/Columnstore.xml.installSave > /dev/null 2>&1"; - system(cmd.c_str()); - cmd = "mv -f " + installDir + "/etc/Columnstore.xml " + installDir + "/etc/Columnstore.xml.installSave > /dev/null 2>&1"; - system(cmd.c_str()); - cmd = "/bin/cp -f " + installDir + "/etc/Columnstore.xml.singleserver " + installDir + "/etc/Columnstore.xml > /dev/null 2>&1"; - system(cmd.c_str()); - } - - setSystemName(); - cout << endl; - - system(cmd.c_str()); - - // setup storage - if (!storageSetup(false)) - { - cout << "ERROR: Problem setting up storage" << endl; - exit(1); - } - - if (hdfs || !rootUser) - if( !updateBash() ) - cout << "updateBash error" << endl; - - // setup storage - if (!singleServerDBrootSetup()) - { - cout << "ERROR: Problem setting up DBRoot IDs" << endl; - exit(1); - } - - //set system DBRoot count and check 'files per parition' with number of dbroots - try { - sysConfig->setConfig(SystemSection, "DBRootCount", oam.itoa(DBRootCount)); - } - catch(...) - { - cout << "ERROR: Problem setting DBRoot Count in the MariaDB ColumnStore System Configuration file" << endl; - exit(1); - } - - //check if dbrm data resides in older directory path and inform user if it does - dbrmDirCheck(); - - if (startOfflinePrompt) - offLineAppCheck(); - - checkMysqlPort(mysqlPort, sysConfig); - - if ( !writeConfig(sysConfig) ) { - cout << "ERROR: Failed trying to update MariaDB ColumnStore System Configuration file" << endl; - exit(1); - } - - cout << endl << "===== Performing Configuration Setup and MariaDB ColumnStore Startup =====" << endl; - - cmd = installDir + "/bin/installer dummy.rpm dummy.rpm dummy.rpm dummy.rpm dummy.rpm initial dummy " + reuseConfig + " --nodeps ' ' 1 " + installDir; - system(cmd.c_str()); - exit(0); - } - else - { - if (temp == "2") { + if (temp == "1") { singleServerInstall = temp; + + cout << endl << "Performing the Single Server Install." << endl; + break; } - } - cout << "Invalid Entry, please re-enter" << endl; - if ( noPrompting ) - exit(1); + else + { + if (temp == "2") { + singleServerInstall = temp; + break; + } + } - continue; + cout << "Invalid Entry, please re-enter (1 or 2)" << endl; + if ( noPrompting ) + exit(1); + } } - break; + } + + if (singleServerInstall == "1") + { + if ( reuseConfig == "n" ) { + //setup to use the single server Columnstore.xml file + + // we know that our Config instance just timestamped itself in the getConfig + // call above. if postConfigure is running non-interactively we may get here + // within the same second which means the changes that are about to happen + // when Columnstore.xml gets overwritten will be ignored because of the Config + // instance won't know to reload + sleep(1); + + cmd = "rm -f " + installDir + "/etc/Columnstore.xml.installSave > /dev/null 2>&1"; + system(cmd.c_str()); + cmd = "mv -f " + installDir + "/etc/Columnstore.xml " + installDir + "/etc/Columnstore.xml.installSave > /dev/null 2>&1"; + system(cmd.c_str()); + cmd = "/bin/cp -f " + installDir + "/etc/Columnstore.xml.singleserver " + installDir + "/etc/Columnstore.xml > /dev/null 2>&1"; + system(cmd.c_str()); + } + + setSystemName(single_server_quick_install); + + if (!single_server_quick_install) + { + cout << endl; + + // setup storage + if (!storageSetup(false)) + { + cout << "ERROR: Problem setting up storage" << endl; + exit(1); + } + + // setup storage + if (!singleServerDBrootSetup()) + { + cout << "ERROR: Problem setting up DBRoot IDs" << endl; + exit(1); + } + + //set system DBRoot count and check 'files per parition' with number of dbroots + try { + sysConfig->setConfig(SystemSection, "DBRootCount", oam.itoa(DBRootCount)); + } + catch(...) + { + cout << "ERROR: Problem setting DBRoot Count in the MariaDB ColumnStore System Configuration file" << endl; + exit(1); + } + + //check if dbrm data resides in older directory path and inform user if it does + dbrmDirCheck(); + + if (startOfflinePrompt) + offLineAppCheck(); + } + + checkMysqlPort(mysqlPort, sysConfig); + + if (hdfs || !rootUser) + if( !updateBash() ) + cout << "updateBash error" << endl; + + if ( !writeConfig(sysConfig) ) { + cout << "ERROR: Failed trying to update MariaDB ColumnStore System Configuration file" << endl; + exit(1); + } + + cout << endl << "===== Performing Configuration Setup and MariaDB ColumnStore Startup =====" << endl; + + cmd = installDir + "/bin/installer dummy.rpm dummy.rpm dummy.rpm dummy.rpm dummy.rpm initial dummy " + reuseConfig + " --nodeps ' ' 1 " + installDir; + system(cmd.c_str()); + exit(0); } try { @@ -4796,29 +4837,36 @@ bool storageSetup(bool amazonInstall) } -void setSystemName() +void setSystemName(bool single_server_quick_install) { Oam oam; //setup System Name - try { - systemName = sysConfig->getConfig(SystemSection, "SystemName"); - } - catch(...) - { - systemName = oam::UnassignedName; - } - + if ( systemName.empty() ) - systemName = oam::UnassignedName; - - prompt = "Enter System Name (" + systemName + ") > "; - pcommand = callReadline(prompt.c_str()); - if (pcommand) { - if (strlen(pcommand) > 0) systemName = pcommand; - callFree(pcommand); + try { + systemName = sysConfig->getConfig(SystemSection, "SystemName"); + } + catch(...) + { + systemName = oam::UnassignedName; + } } + + if ( systemName.empty() ) + systemName = "columnstore-1"; + if (!single_server_quick_install) + { + prompt = "Enter System Name (" + systemName + ") > "; + pcommand = callReadline(prompt.c_str()); + if (pcommand) + { + if (strlen(pcommand) > 0) systemName = pcommand; + callFree(pcommand); + } + } + try { sysConfig->setConfig(SystemSection, "SystemName", systemName); oam.changeMyCnf( "server_audit_syslog_info", systemName ); From 37a159064b517266901c9c80f2010d89cf0d5909 Mon Sep 17 00:00:00 2001 From: David Hill Date: Thu, 5 Jul 2018 15:21:33 -0500 Subject: [PATCH 072/123] added quick install script --- oamapps/postConfigure/postConfigure.cpp | 8 +++---- .../quick_installer_single_server.sh | 22 +++++++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 oamapps/postConfigure/quick_installer_single_server.sh diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index cd17b880c..e7670ce99 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -289,20 +289,20 @@ int main(int argc, char *argv[]) cout << " Enter one of the options within [], if available, or" << endl; cout << " Enter a new value" << endl << endl; cout << endl; - cout << "Usage: postConfigure [-h][-c][-u][-p][-sq][-port][-i][-n][-sn]" << endl; + cout << "Usage: postConfigure [-h][-c][-u][-p][-qs][-port][-i][-n][-sn]" << endl; cout << " -h Help" << endl; cout << " -c Config File to use to extract configuration data, default is Columnstore.xml.rpmsave" << endl; cout << " -u Upgrade, Install using the Config File from -c, default to Columnstore.xml.rpmsave" << endl; cout << " If ssh-keys aren't setup, you should provide passwords as command line arguments" << endl; cout << " -p Unix Password, used with no-prompting option" << endl; - cout << " -sq Single Server Quick Install" << endl; + cout << " -qs Single Server Quick Install" << endl; cout << " -port MariaDB ColumnStore Port Address" << endl; cout << " -i Non-root Install directory, Only use for non-root installs" << endl; cout << " -n Non-distributed install, meaning it will not install the remote nodes" << endl; cout << " -sn System Name" << endl; exit (0); } - else if( string("-sq") == argv[i] ) + else if( string("-qs") == argv[i] ) { single_server_quick_install = true; noPrompting = true; @@ -372,7 +372,7 @@ int main(int argc, char *argv[]) else { cout << " ERROR: Invalid Argument = " << argv[i] << endl; - cout << " Usage: postConfigure [-h][-c][-u][-p][-s][-port][-i]" << endl; + cout << " Usage: postConfigure [-h][-c][-u][-p][-qs][-port][-i][-n][-sn]" << endl; exit (1); } } diff --git a/oamapps/postConfigure/quick_installer_single_server.sh b/oamapps/postConfigure/quick_installer_single_server.sh new file mode 100644 index 000000000..dfbc2d06c --- /dev/null +++ b/oamapps/postConfigure/quick_installer_single_server.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# +# $Id: quick_installer_single_server.sh 3705 2018-07-07 19:47:20Z dhill $ +# +# Poddst- Quick Installer for Single Server MariaDB Columnstore + + +if [ $HOME == "root" ]; then + echo "Run post-install script" + echo "" + /usr/local/mariadb/columnstore/bin/post-install + echo "Run postConfigure script" + echo "" + /usr/local/mariadb/columnstore/bin/postConfigure -qs +else + echo "Run post-install script" + echo "" + $HOME/mariadb/columnstore/bin/post-install --installdir=$HOME/mariadb/columnstore + echo "Run postConfigure script" + echo "" + $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -sq +fi From 2c533c000219308bd98fcd6471a58670eaf9558f Mon Sep 17 00:00:00 2001 From: David Hill Date: Thu, 5 Jul 2018 16:40:36 -0500 Subject: [PATCH 073/123] add quick install script --- oamapps/postConfigure/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/oamapps/postConfigure/CMakeLists.txt b/oamapps/postConfigure/CMakeLists.txt index ebe7d7f3f..60f761e25 100644 --- a/oamapps/postConfigure/CMakeLists.txt +++ b/oamapps/postConfigure/CMakeLists.txt @@ -56,3 +56,8 @@ target_link_libraries(mycnfUpgrade ${ENGINE_LDFLAGS} readline ncurses ${MARIADB_ install(TARGETS mycnfUpgrade DESTINATION ${ENGINE_BINDIR} COMPONENT platform) + +########### next target ############### + +install(PROGRAMS quick_installer_single_server.sh + DESTINATION ${ENGINE_BINDIR} COMPONENT platform) From f0ed78e92b6e9ac0ffa0a9fcccb68bd7a26a2d09 Mon Sep 17 00:00:00 2001 From: David Hill Date: Fri, 6 Jul 2018 08:43:58 -0500 Subject: [PATCH 074/123] fix root install path --- oamapps/postConfigure/quick_installer_single_server.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oamapps/postConfigure/quick_installer_single_server.sh b/oamapps/postConfigure/quick_installer_single_server.sh index dfbc2d06c..3cf1dac50 100644 --- a/oamapps/postConfigure/quick_installer_single_server.sh +++ b/oamapps/postConfigure/quick_installer_single_server.sh @@ -5,7 +5,7 @@ # Poddst- Quick Installer for Single Server MariaDB Columnstore -if [ $HOME == "root" ]; then +if [ $HOME == "/root" ]; then echo "Run post-install script" echo "" /usr/local/mariadb/columnstore/bin/post-install From 4f659108f096e2b9077f61e0ac04fd3c3e73dd68 Mon Sep 17 00:00:00 2001 From: David Hill Date: Fri, 6 Jul 2018 16:39:58 -0500 Subject: [PATCH 075/123] MCOL-1146 - multi-node quick install --- oam/etc/Columnstore.xml | 2 +- oamapps/postConfigure/CMakeLists.txt | 2 +- oamapps/postConfigure/postConfigure.cpp | 258 ++++++++++++++---- .../quick_installer_multi_server.sh | 63 +++++ .../quick_installer_single_server.sh | 13 +- 5 files changed, 285 insertions(+), 53 deletions(-) create mode 100644 oamapps/postConfigure/quick_installer_multi_server.sh diff --git a/oam/etc/Columnstore.xml b/oam/etc/Columnstore.xml index ec985028f..d083cf60c 100644 --- a/oam/etc/Columnstore.xml +++ b/oam/etc/Columnstore.xml @@ -438,7 +438,7 @@ n n n - 2 + 1 n n internal diff --git a/oamapps/postConfigure/CMakeLists.txt b/oamapps/postConfigure/CMakeLists.txt index 60f761e25..cfe3d700e 100644 --- a/oamapps/postConfigure/CMakeLists.txt +++ b/oamapps/postConfigure/CMakeLists.txt @@ -59,5 +59,5 @@ install(TARGETS mycnfUpgrade DESTINATION ${ENGINE_BINDIR} COMPONENT platform) ########### next target ############### -install(PROGRAMS quick_installer_single_server.sh +install(PROGRAMS quick_installer_single_server.sh quick_installer_multi_server.sh DESTINATION ${ENGINE_BINDIR} COMPONENT platform) diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index e7670ce99..08fa549a8 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -99,6 +99,14 @@ typedef struct Performance_Module_struct typedef std::vector PerformanceModuleList; +typedef struct ModuleIP_struct +{ + std::string IPaddress; + std::string moduleName; +} ModuleIP; + +typedef std::vector ModuleIpList; + void offLineAppCheck(); bool setOSFiles(string parentOAMModuleName, int serverTypeInstall); bool checkSaveConfigFile(); @@ -112,7 +120,7 @@ bool makeRClocal(string moduleType, string moduleName, int IserverTypeInstall); bool createDbrootDirs(string DBRootStorageType); bool pkgCheck(std::string columnstorePackage); bool storageSetup(bool amazonInstall); -void setSystemName(bool single_server_quick_install = false); +void setSystemName(); bool singleServerDBrootSetup(); bool copyFstab(string moduleName); bool attachVolume(string instanceName, string volumeName, string deviceName, string dbrootPath); @@ -121,12 +129,6 @@ void remoteInstallThread(void *); bool glusterSetup(string password); -typedef struct ModuleIP_struct -{ - std::string IPaddress; - std::string moduleName; -} ModuleIP; - std::string launchInstance(ModuleIP moduleip); string columnstorePackage; @@ -150,7 +152,6 @@ string PMVolumeType = "standard"; string PMVolumeIOPS = oam::UnassignedName; string UMVolumeIOPS = oam::UnassignedName; - int DBRootCount; string deviceName; @@ -185,6 +186,8 @@ string MySQLRep = "y"; string PMwithUM = "n"; bool amazonInstall = false; bool nonDistribute = false; +bool single_server_quick_install = false; +bool multi_server_quick_install = false; string DataFileEnvFile; @@ -219,8 +222,9 @@ int main(int argc, char *argv[]) noPrompting = false; string password; string cmd; - bool single_server_quick_install = false; - + string pmIpAddrs = ""; + string umIpAddrs = ""; + // struct sysinfo myinfo; // hidden options @@ -272,10 +276,12 @@ int main(int argc, char *argv[]) if (p && *p) HOME = p; } + for( int i = 1; i < argc; i++ ) { - if( string("-h") == argv[i] ) { + if( string("-h") == argv[i] ) + { cout << endl; cout << "This is the MariaDB ColumnStore System Configuration and Installation tool." << endl; cout << "It will Configure the MariaDB ColumnStore System based on Operator inputs and" << endl; @@ -289,48 +295,62 @@ int main(int argc, char *argv[]) cout << " Enter one of the options within [], if available, or" << endl; cout << " Enter a new value" << endl << endl; cout << endl; - cout << "Usage: postConfigure [-h][-c][-u][-p][-qs][-port][-i][-n][-sn]" << endl; + cout << "Usage: postConfigure [-h][-c][-u][-p][-qs][-qm][-port][-i][-n][-sn][-pm-ip-addrs][-um-ip-addrs]" << endl; cout << " -h Help" << endl; cout << " -c Config File to use to extract configuration data, default is Columnstore.xml.rpmsave" << endl; cout << " -u Upgrade, Install using the Config File from -c, default to Columnstore.xml.rpmsave" << endl; cout << " If ssh-keys aren't setup, you should provide passwords as command line arguments" << endl; cout << " -p Unix Password, used with no-prompting option" << endl; - cout << " -qs Single Server Quick Install" << endl; + cout << " -qs Quick Install - Single Server" << endl; + cout << " -qm Quick Install - Multi Server" << endl; cout << " -port MariaDB ColumnStore Port Address" << endl; cout << " -i Non-root Install directory, Only use for non-root installs" << endl; cout << " -n Non-distributed install, meaning it will not install the remote nodes" << endl; cout << " -sn System Name" << endl; + cout << " -pm-ip-addrs Performance Module IP Addresses xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx" << endl; + cout << " -um-ip-addrs User Module IP Addresses xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx" << endl; exit (0); } - else if( string("-qs") == argv[i] ) - { - single_server_quick_install = true; - noPrompting = true; - } - else if( string("-f") == argv[i] ) - nodeps = "--nodeps"; - else if( string("-o") == argv[i] ) - startOfflinePrompt = true; - else if( string("-c") == argv[i] ) { + else if( string("-qs") == argv[i] ) + { + single_server_quick_install = true; + noPrompting = true; + } + else if( string("-qm") == argv[i] ) + { + multi_server_quick_install = true; + noPrompting = true; + } + else if( string("-f") == argv[i] ) + nodeps = "--nodeps"; + else if( string("-o") == argv[i] ) + startOfflinePrompt = true; + else if( string("-c") == argv[i] ) + { i++; - if (i >= argc ) { + if (i >= argc ) + { cout << " ERROR: Config File not provided" << endl; exit (1); } oldFileName = argv[i]; - if ( oldFileName.find("Columnstore.xml") == string::npos ) { + if ( oldFileName.find("Columnstore.xml") == string::npos ) + { cout << " ERROR: Config File is not a Columnstore.xml file name" << endl; exit (1); } } - else if( string("-p") == argv[i] ) { + else if( string("-p") == argv[i] ) + { i++; - if (i >= argc ) { + if (i >= argc ) + { cout << " ERROR: Password not provided" << endl; exit (1); } password = argv[i]; - if ( password.find("-") != string::npos ) { + if ( password.find("-") != string::npos ) + { cout << " ERROR: Valid Password not provided" << endl; exit (1); } @@ -340,9 +360,11 @@ int main(int argc, char *argv[]) // for backward compatibility else if( string("-n") == argv[i] ) nonDistribute = true; - else if( string("-port") == argv[i] ) { + else if( string("-port") == argv[i] ) + { i++; - if (i >= argc ) { + if (i >= argc ) + { cout << " ERROR: MariaDB ColumnStore Port ID not supplied" << endl; exit (1); } @@ -353,29 +375,64 @@ int main(int argc, char *argv[]) exit (1); } } - else if( string("-i") == argv[i] ) { + else if( string("-i") == argv[i] ) + { i++; - if (i >= argc ) { + if (i >= argc ) + { cout << " ERROR: Path not provided" << endl; exit (1); } installDir = argv[i]; } - else if( string("-sn") == argv[i] ) { + else if( string("-sn") == argv[i] ) + { i++; - if (i >= argc ) { + if (i >= argc ) + { cout << " ERROR: System-name not provided" << endl; exit (1); } systemName = argv[i]; + } + else if( string("-pm-ip-addrs") == argv[i] ) + { + i++; + if (i >= argc ) + { + cout << " ERROR: PM-IP-ADRESSES not provided" << endl; + exit (1); + } + pmIpAddrs = argv[i]; + } + else if( string("-um-ip-addrs") == argv[i] ) + { + i++; + if (i >= argc ) + { + cout << " ERROR: UM-IP-ADRESSES not provided" << endl; + exit (1); + } + umIpAddrs = argv[i]; } else { cout << " ERROR: Invalid Argument = " << argv[i] << endl; - cout << " Usage: postConfigure [-h][-c][-u][-p][-qs][-port][-i][-n][-sn]" << endl; + cout << " Usage: postConfigure [-h][-c][-u][-p][-qs][-qm][-port][-i][-n][-sn][-pm-ip-addrs][-um-ip-addrs]" << endl; exit (1); } } + + //check if quick install multi-server has been given ip address + if (multi_server_quick_install) + { + if ( umIpAddrs.empty() && pmIpAddrs.empty() || + !umIpAddrs.empty() && pmIpAddrs.empty() ) + { + cout << " ERROR: Multi-Server option entered, but invalid UM/PM IP addresses were provided, exiting" << endl; + exit(1); + } + } if (installDir[0] != '/') { @@ -395,7 +452,7 @@ int main(int argc, char *argv[]) cout << "IMPORTANT: This tool requires to run on the Performance Module #1" << endl; cout << endl; - if (!single_server_quick_install) + if (!single_server_quick_install || !multi_server_quick_install) { if (!noPrompting) { cout << "Prompting instructions:" << endl << endl; @@ -436,7 +493,7 @@ int main(int argc, char *argv[]) } //check Config saved files - if (!single_server_quick_install) + if (!single_server_quick_install || !multi_server_quick_install) { if ( !checkSaveConfigFile()) { @@ -448,7 +505,7 @@ int main(int argc, char *argv[]) //determine package type string EEPackageType; - if (single_server_quick_install) + if (single_server_quick_install || multi_server_quick_install) { if (!rootUser) EEPackageType = "binary"; @@ -638,7 +695,7 @@ int main(int argc, char *argv[]) if (single_server_quick_install) { - cout << "===== Quick Single-Server Install Configuration =====" << endl << endl; + cout << "===== Quick Install Single-Server Configuration =====" << endl << endl; cout << "Single-Server install is used when there will only be 1 server configured" << endl; cout << "on the system. It can also be used for production systems, if the plan is" << endl; @@ -646,6 +703,12 @@ int main(int argc, char *argv[]) singleServerInstall = "1"; } + else if (multi_server_quick_install) + { + cout << "===== Quick Install Multi-Server Configuration =====" << endl << endl; + + singleServerInstall = "2"; + } else { cout << "===== Setup System Server Type Configuration =====" << endl << endl; @@ -724,7 +787,7 @@ int main(int argc, char *argv[]) system(cmd.c_str()); } - setSystemName(single_server_quick_install); + setSystemName(); if (!single_server_quick_install) { @@ -796,6 +859,76 @@ int main(int argc, char *argv[]) // // Multi-server install // + + ModuleIP InputModuleIP; + ModuleIpList InputModuleIPList; + + int MaxNicID = oam::MAX_NIC; + + if (multi_server_quick_install) + { + //set configuarion settings for default setup + try { + sysConfig->setConfig(InstallSection, "MySQLRep", "y"); + } + catch(...) + {} + + if (umIpAddrs == "" ) + { + // set Server Type Installation to combined + try { + sysConfig->setConfig(InstallSection, "ServerTypeInstall", "2"); + } + catch(...) + {} + } + else + { + int id = 1; + boost::char_separator sep(","); + boost::tokenizer< boost::char_separator > tokens(umIpAddrs, sep); + for ( boost::tokenizer< boost::char_separator >::iterator it = tokens.begin(); + it != tokens.end(); + ++it, ++id) + { + string module = "um" + oam.itoa(id); + + InputModuleIP.IPaddress = *it; + InputModuleIP.moduleName = module; + InputModuleIPList.push_back(InputModuleIP); + } + + umNumber = id; + } + + if (pmIpAddrs != "" ) + { + int id = 1; + boost::char_separator sep(","); + boost::tokenizer< boost::char_separator > tokens(pmIpAddrs, sep); + for ( boost::tokenizer< boost::char_separator >::iterator it = tokens.begin(); + it != tokens.end(); + ++it, ++id) + { + string module = "pm" + oam.itoa(id); + + InputModuleIP.IPaddress = *it; + InputModuleIP.moduleName = module; + InputModuleIPList.push_back(InputModuleIP); + } + + pmNumber = id; + } + + if ( !writeConfig(sysConfig) ) + { + cout << "ERROR: Failed trying to update MariaDB ColumnStore System Configuration file" << endl; + exit(1); + } + + MaxNicID = 1; + } cout << endl; //cleanup/create local/etc directory @@ -851,7 +984,7 @@ int main(int argc, char *argv[]) } switch ( IserverTypeInstall ) { - case (oam::INSTALL_COMBINE_DM_UM_PM): // combined #1 - dm/um/pm on a single server + case (oam::INSTALL_COMBINE_DM_UM_PM): // combined #1 - um/pm on a single server { cout << "Combined Server Installation will be performed." << endl; cout << "The Server will be configured as a Performance Module." << endl; @@ -986,8 +1119,8 @@ int main(int argc, char *argv[]) } if ( answer == "y" ) { - mysqlRep = true; - MySQLRep = "y"; + mysqlRep = true; + MySQLRep = "y"; } else { @@ -1463,6 +1596,14 @@ int main(int argc, char *argv[]) catch(...) {} } + + if ( moduleType == "um") + if ( umNumber != 0 ) + moduleCount = umNumber; + + if ( moduleType == "pm") + if ( pmNumber != 0 ) + moduleCount = pmNumber; //verify and setup of modules count switch ( IserverTypeInstall ) { @@ -1545,7 +1686,7 @@ int main(int argc, char *argv[]) //clear any Equipped Module IP addresses that aren't in current ID range for ( int j = 0 ; j < listSize ; j++ ) { - for ( unsigned int k = 1 ; k < MAX_NIC+1 ; k++) + for ( unsigned int k = 1 ; k < MaxNicID+1 ; k++) { string ModuleIPAddr = "ModuleIPAddr" + oam.itoa(j+1) + "-" + oam.itoa(k) + "-" + oam.itoa(i+1); if ( !(sysConfig->getConfig(ModuleSection, ModuleIPAddr).empty()) ) { @@ -1594,7 +1735,8 @@ int main(int argc, char *argv[]) moduleDisableState = oam::ENABLEDSTATE; //setup HostName/IPAddress for each NIC - for( unsigned int nicID=1 ; nicID < MAX_NIC+1 ; nicID++ ) + + for( unsigned int nicID=1 ; nicID < MaxNicID +1 ; nicID++ ) { string moduleHostName = oam::UnassignedName; string moduleIPAddr = oam::UnassignedIpAddr; @@ -1615,14 +1757,30 @@ int main(int argc, char *argv[]) for( ; pt1 != (*listPT).hostConfigList.end() ; pt1++) { if ((*pt1).NicID == nicID) { - moduleHostName = (*pt1).HostName; - moduleIPAddr = (*pt1).IPAddr; + bool found = false; + ModuleIpList::iterator pt2 = InputModuleIPList.begin(); + for( ; pt2 != InputModuleIPList.end() ; pt2++) + { + if ( (*pt2).moduleName == newModuleName ) + { + moduleHostName = (*pt2).IPaddress; + moduleIPAddr = (*pt2).IPaddress; + found = true; + } + } + + if ( !found ) + { + moduleHostName = (*pt1).HostName; + moduleIPAddr = (*pt1).IPAddr; + } break; } } } } - + + if ( nicID == 1 ) { if ( moduleDisableState != oam::ENABLEDSTATE ) { string disabled = "y"; @@ -4837,7 +4995,7 @@ bool storageSetup(bool amazonInstall) } -void setSystemName(bool single_server_quick_install) +void setSystemName() { Oam oam; //setup System Name @@ -4856,7 +5014,7 @@ void setSystemName(bool single_server_quick_install) if ( systemName.empty() ) systemName = "columnstore-1"; - if (!single_server_quick_install) + if (!single_server_quick_install || !multi_server_quick_install) { prompt = "Enter System Name (" + systemName + ") > "; pcommand = callReadline(prompt.c_str()); diff --git a/oamapps/postConfigure/quick_installer_multi_server.sh b/oamapps/postConfigure/quick_installer_multi_server.sh new file mode 100644 index 000000000..25729ec7c --- /dev/null +++ b/oamapps/postConfigure/quick_installer_multi_server.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# +# $Id: quick_installer_multi_server.sh 3705 2018-07-07 19:47:20Z dhill $ +# +# Poddst- Quick Installer for Multi Server MariaDB Columnstore + +pmIpAddrs="" +umIpAddrs="" + +for arg in "$@"; do + if [ `expr -- "$arg" : '--pm-ip-addresses='` -eq 18 ]; then + pmIpAddrs="`echo $arg | awk -F= '{print $2}'`" + elif [ `expr -- "$arg" : '--pm-ip-addresses='` -eq 18 ]; then + umIpAddrs="`echo $arg | awk -F= '{print $2}'`" + elif [ `expr -- "$arg" : '--help'` -eq 6 ]; then + echo "Usage ./quick_installer_multi_server.sh [OPTION]" + echo "" + echo "Quick Installer for a Multi Server MariaDB ColumnStore Install" + echo "" + echo "Performace Module (pm) IP addresses required" + echo "User Module (um) IP addresses option" + echo "When only pm IP addresses provided, system is combined setup" + echo "When both pm/um IP addresses provided, system is seperate setup" + echo + echo "--pm-ip-addresses=xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx" + echo "--um-ip-addresses=xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx" + echo "" + else + echo "quick_installer_multi_server.sh: ignoring unknown argument: $arg" 1>&2 + fi +done + +if [ $pmIpAddrs == "" ] ; then + echo "" + echo "Performace Module (pm) IP addresses required, exiting" + exit 1 +else + if [ $umIpAddrs == "" ] ; then + echo "" + echo "Performing a Multi-Server Combined install with um/pm running on some server" + echo"" + else + echo "" + echo "Performing a Multi-Server Seperate install with um and pm running on seperate servers" + echo"" + fi +fi + +if [ $HOME == "/root" ]; then + echo "Run post-install script" + echo "" + /usr/local/mariadb/columnstore/bin/post-install + echo "Run postConfigure script" + echo "" + /usr/local/mariadb/columnstore/bin/postConfigure -qm -pm-ip-addrs=$pmIpAddrs -um-ip-addrs=$umIpAddrs +else + echo "Run post-install script" + echo "" + $HOME/mariadb/columnstore/bin/post-install --installdir=$HOME/mariadb/columnstore + echo "Run postConfigure script" + echo "" + $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qm -pm-ip-addrs=$pmIpAddrs -um-ip-addrs=$umIpAddrs +fi diff --git a/oamapps/postConfigure/quick_installer_single_server.sh b/oamapps/postConfigure/quick_installer_single_server.sh index 3cf1dac50..bfd9b81c4 100644 --- a/oamapps/postConfigure/quick_installer_single_server.sh +++ b/oamapps/postConfigure/quick_installer_single_server.sh @@ -4,6 +4,17 @@ # # Poddst- Quick Installer for Single Server MariaDB Columnstore +for arg in "$@"; do + if [ `expr -- "$arg" : '--help'` -eq 6 ]; then + echo "Usage ./quick_installer_multi_server.sh" + echo "" + echo "Quick Installer for a Single Server MariaDB ColumnStore Install" + echo "" + else + echo "quick_installer_multi_server.sh: ignoring unknown argument: $arg" 1>&2 + fi +done + if [ $HOME == "/root" ]; then echo "Run post-install script" @@ -18,5 +29,5 @@ else $HOME/mariadb/columnstore/bin/post-install --installdir=$HOME/mariadb/columnstore echo "Run postConfigure script" echo "" - $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -sq + $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qs fi From 20c2cc87addfbc38eafca535705cc8c5361ce73f Mon Sep 17 00:00:00 2001 From: David Hill Date: Mon, 9 Jul 2018 09:42:54 -0500 Subject: [PATCH 076/123] changes for dbroot --- oamapps/postConfigure/postConfigure.cpp | 57 +++++++++++++++---------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index 08fa549a8..20b9922cb 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -493,13 +493,10 @@ int main(int argc, char *argv[]) } //check Config saved files - if (!single_server_quick_install || !multi_server_quick_install) + if ( !checkSaveConfigFile()) { - if ( !checkSaveConfigFile()) - { - cout << "ERROR: Configuration File not setup" << endl; - exit(1); - } + cout << "ERROR: Configuration File not setup" << endl; + exit(1); } //determine package type @@ -2471,28 +2468,35 @@ int main(int argc, char *argv[]) string dbrootList; - for ( unsigned int id = 1 ; id < count+1 ; ) + if (multi_server_quick_install) { - string moduledbrootid = "ModuleDBRootID" + oam.itoa(moduleID) + "-" + oam.itoa(id) + "-" + oam.itoa(i+1); - try { - string dbrootid = sysConfig->getConfig(ModuleSection, moduledbrootid); + dbrootList = oam.itoa(moduleID); + } + else + { + for ( unsigned int id = 1 ; id < count+1 ; ) + { + string moduledbrootid = "ModuleDBRootID" + oam.itoa(moduleID) + "-" + oam.itoa(id) + "-" + oam.itoa(i+1); + try { + string dbrootid = sysConfig->getConfig(ModuleSection, moduledbrootid); - if ( dbrootid != oam::UnassignedName) { - sysConfig->setConfig(ModuleSection, moduledbrootid, oam::UnassignedName); + if ( dbrootid != oam::UnassignedName) { + sysConfig->setConfig(ModuleSection, moduledbrootid, oam::UnassignedName); - dbrootList = dbrootList + dbrootid; - id ++; - if ( id < count+1 ) - dbrootList = dbrootList + ","; + dbrootList = dbrootList + dbrootid; + id ++; + if ( id < count+1 ) + dbrootList = dbrootList + ","; + } + } + catch(...) + { + cout << "ERROR: Problem setting DBRoot ID in the MariaDB ColumnStore System Configuration file" << endl; + exit(1); } } - catch(...) - { - cout << "ERROR: Problem setting DBRoot ID in the MariaDB ColumnStore System Configuration file" << endl; - exit(1); - } } - + vector dbroots; string tempdbrootList; @@ -3581,6 +3585,15 @@ bool checkSaveConfigFile() } return true; } + else + { + if (single_server_quick_install || multi_server_quick_install) + { + cout << endl << "Quick Install is for fresh installs only, '" + oldFileName + "' exist, exiting" << endl; + exit(1); + } + } + File.close(); // If 'oldFileName' isn't configured, exit From 80fc93c9df00a622901391d4fdf3be339e5b24d1 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Mon, 9 Jul 2018 18:36:28 +0300 Subject: [PATCH 077/123] MCOL-1510 GROUP BY supports aggregates as arguments in numerical functions. --- dbcon/mysql/ha_calpont_execplan.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index b06c197aa..a7c9cd7bf 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -3412,6 +3412,13 @@ ReturnedColumn* buildFunctionColumn( ReturnedColumn* rc = buildReturnedColumn(ifp->arguments()[i], gwi, nonSupport); + // MCOL-1510 It must be a temp table field, so find the corresponding column. + if (pushdownHand + && ifp->arguments()[i]->type() == Item::REF_ITEM) + { + rc = findCorrespTempField(static_cast(ifp->arguments()[i]), gwi); + } + if (!rc || nonSupport) { nonSupport = true; From 11d20940897835e49a3ba310a24dc5fc933ceaf3 Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 10 Jul 2018 14:03:17 -0500 Subject: [PATCH 078/123] MCOL-1531 Don't compare alias in == operators for ReturnedColumn types --- dbcon/execplan/arithmeticcolumn.cpp | 8 ++++---- dbcon/execplan/constantcolumn.cpp | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dbcon/execplan/arithmeticcolumn.cpp b/dbcon/execplan/arithmeticcolumn.cpp index 5c12f7e5f..2c2eb5ace 100644 --- a/dbcon/execplan/arithmeticcolumn.cpp +++ b/dbcon/execplan/arithmeticcolumn.cpp @@ -345,10 +345,10 @@ bool ArithmeticColumn::operator==(const ArithmeticColumn& t) const } else if (fExpression != NULL || t.fExpression != NULL) return false; - if (fAlias != t.fAlias) - return false; - if (fTableAlias != t.fTableAlias) - return false; +// if (fAlias != t.fAlias) +// return false; +// if (fTableAlias != t.fTableAlias) +// return false; if (fData != t.fData) return false; return true; diff --git a/dbcon/execplan/constantcolumn.cpp b/dbcon/execplan/constantcolumn.cpp index fce3825b8..fadd2dfc9 100644 --- a/dbcon/execplan/constantcolumn.cpp +++ b/dbcon/execplan/constantcolumn.cpp @@ -316,8 +316,8 @@ bool ConstantColumn::operator==(const ConstantColumn& t) const return false; if (fType != t.fType) return false; - if (fAlias != t.fAlias) - return false; +// if (fAlias != t.fAlias) +// return false; if (fData != t.fData) return false; if (fReturnAll != t.fReturnAll) From 68c01a3d127c54a9b285d6b9ce95ac6ebe145228 Mon Sep 17 00:00:00 2001 From: David Hill Date: Tue, 10 Jul 2018 14:27:28 -0500 Subject: [PATCH 079/123] fixes for multi install --- oamapps/postConfigure/postConfigure.cpp | 176 +++++++++++------- .../quick_installer_multi_server.sh | 41 ++-- 2 files changed, 133 insertions(+), 84 deletions(-) diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index 20b9922cb..5e3fdd964 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -186,6 +186,7 @@ string MySQLRep = "y"; string PMwithUM = "n"; bool amazonInstall = false; bool nonDistribute = false; +bool nonDistributeFlag = false; bool single_server_quick_install = false; bool multi_server_quick_install = false; @@ -295,7 +296,7 @@ int main(int argc, char *argv[]) cout << " Enter one of the options within [], if available, or" << endl; cout << " Enter a new value" << endl << endl; cout << endl; - cout << "Usage: postConfigure [-h][-c][-u][-p][-qs][-qm][-port][-i][-n][-sn][-pm-ip-addrs][-um-ip-addrs]" << endl; + cout << "Usage: postConfigure [-h][-c][-u][-p][-qs][-qm][-port][-i][-n][-d][-sn][-pm-ip-addrs][-um-ip-addrs]" << endl; cout << " -h Help" << endl; cout << " -c Config File to use to extract configuration data, default is Columnstore.xml.rpmsave" << endl; cout << " -u Upgrade, Install using the Config File from -c, default to Columnstore.xml.rpmsave" << endl; @@ -305,7 +306,8 @@ int main(int argc, char *argv[]) cout << " -qm Quick Install - Multi Server" << endl; cout << " -port MariaDB ColumnStore Port Address" << endl; cout << " -i Non-root Install directory, Only use for non-root installs" << endl; - cout << " -n Non-distributed install, meaning it will not install the remote nodes" << endl; + cout << " -n Non-distributed install, meaning postConfigure will not install packages on remote nodes" << endl; + cout << " -d Distributed install, meaning postConfigure will install packages on remote nodes" << endl; cout << " -sn System Name" << endl; cout << " -pm-ip-addrs Performance Module IP Addresses xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx" << endl; cout << " -um-ip-addrs User Module IP Addresses xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx" << endl; @@ -359,7 +361,15 @@ int main(int argc, char *argv[]) noPrompting = true; // for backward compatibility else if( string("-n") == argv[i] ) + { nonDistribute = true; + nonDistributeFlag = true; + } + else if( string("-d") == argv[i] ) + { + nonDistribute = false; + nonDistributeFlag = true; + } else if( string("-port") == argv[i] ) { i++; @@ -502,37 +512,34 @@ int main(int argc, char *argv[]) //determine package type string EEPackageType; - if (single_server_quick_install || multi_server_quick_install) + if (!rootUser) + EEPackageType = "binary"; + else { - if (!rootUser) - EEPackageType = "binary"; - else - { - int rtnCode = system("rpm -qi mariadb-columnstore-platform > /tmp/columnstore.txt 2>&1"); - if (WEXITSTATUS(rtnCode) == 0) - EEPackageType = "rpm"; - else { - rtnCode = system("dpkg -s mariadb-columnstore-platform > /tmp/columnstore.txt 2>&1"); - if (WEXITSTATUS(rtnCode) == 0) - EEPackageType = "deb"; - else - EEPackageType = "binary"; - } + int rtnCode = system("rpm -qi mariadb-columnstore-platform > /tmp/columnstore.txt 2>&1"); + if (WEXITSTATUS(rtnCode) == 0) + EEPackageType = "rpm"; + else { + rtnCode = system("dpkg -s mariadb-columnstore-platform > /tmp/columnstore.txt 2>&1"); + if (WEXITSTATUS(rtnCode) == 0) + EEPackageType = "deb"; + else + EEPackageType = "binary"; } + } - try { - sysConfig->setConfig(InstallSection, "EEPackageType", EEPackageType); - } - catch(...) - { - cout << "ERROR: Problem setting EEPackageType from the MariaDB ColumnStore System Configuration file" << endl; - exit(1); - } + try { + sysConfig->setConfig(InstallSection, "EEPackageType", EEPackageType); + } + catch(...) + { + cout << "ERROR: Problem setting EEPackageType from the MariaDB ColumnStore System Configuration file" << endl; + exit(1); + } - if ( !writeConfig(sysConfig) ) { - cout << "ERROR: Failed trying to update MariaDB ColumnStore System Configuration file" << endl; - exit(1); - } + if ( !writeConfig(sysConfig) ) { + cout << "ERROR: Failed trying to update MariaDB ColumnStore System Configuration file" << endl; + exit(1); } //check for local ip address as pm1 @@ -668,12 +675,22 @@ int main(int argc, char *argv[]) } //check for non-Distributed Install - if ( nonDistribute ) + if ( nonDistributeFlag ) { - try { - oam.setSystemConfig("DistributedInstall", "n"); - } - catch(...) {} + if ( nonDistribute ) + { + try { + oam.setSystemConfig("DistributedInstall", "n"); + } + catch(...) {} + } + else + { + try { + oam.setSystemConfig("DistributedInstall", "y"); + } + catch(...) {} + } } else { @@ -896,7 +913,7 @@ int main(int argc, char *argv[]) InputModuleIPList.push_back(InputModuleIP); } - umNumber = id; + umNumber = id-1; } if (pmIpAddrs != "" ) @@ -915,7 +932,7 @@ int main(int argc, char *argv[]) InputModuleIPList.push_back(InputModuleIP); } - pmNumber = id; + pmNumber = id-1; } if ( !writeConfig(sysConfig) ) @@ -1733,52 +1750,64 @@ int main(int argc, char *argv[]) //setup HostName/IPAddress for each NIC - for( unsigned int nicID=1 ; nicID < MaxNicID +1 ; nicID++ ) + string moduleHostName = oam::UnassignedName; + string moduleIPAddr = oam::UnassignedIpAddr; + + bool found = false; + if (multi_server_quick_install) { - string moduleHostName = oam::UnassignedName; - string moduleIPAddr = oam::UnassignedIpAddr; - - DeviceNetworkList::iterator listPT = sysModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin(); - for( ; listPT != sysModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; listPT++) + ModuleIpList::iterator pt2 = InputModuleIPList.begin(); + for( ; pt2 != InputModuleIPList.end() ; pt2++) { - if (newModuleName == (*listPT).DeviceName) { - if ( nicID == 1 ) { - moduleDisableState = (*listPT).DisableState; - if ( moduleDisableState.empty() || - moduleDisableState == oam::UnassignedName || - moduleDisableState == oam::AUTODISABLEDSTATE ) - moduleDisableState = oam::ENABLEDSTATE; - } - - HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin(); - for( ; pt1 != (*listPT).hostConfigList.end() ; pt1++) + if ( (*pt2).moduleName == newModuleName ) + { + moduleHostName = (*pt2).IPaddress; + moduleIPAddr = (*pt2).IPaddress; + found = true; + break; + } + } + } + + unsigned int nicID=1; + for( ; nicID < MaxNicID +1 ; nicID++ ) + { + if ( !found ) + { + moduleHostName = oam::UnassignedName; + moduleIPAddr = oam::UnassignedIpAddr; + + DeviceNetworkList::iterator listPT = sysModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin(); + for( ; listPT != sysModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; listPT++) + { + if (newModuleName == (*listPT).DeviceName) { - if ((*pt1).NicID == nicID) { - bool found = false; - ModuleIpList::iterator pt2 = InputModuleIPList.begin(); - for( ; pt2 != InputModuleIPList.end() ; pt2++) + if ( nicID == 1 ) + { + moduleDisableState = (*listPT).DisableState; + if ( moduleDisableState.empty() || + moduleDisableState == oam::UnassignedName || + moduleDisableState == oam::AUTODISABLEDSTATE ) + moduleDisableState = oam::ENABLEDSTATE; { - if ( (*pt2).moduleName == newModuleName ) + HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin(); + for( ; pt1 != (*listPT).hostConfigList.end() ; pt1++) { - moduleHostName = (*pt2).IPaddress; - moduleIPAddr = (*pt2).IPaddress; - found = true; + if ((*pt1).NicID == nicID) + { + moduleHostName = (*pt1).HostName; + moduleIPAddr = (*pt1).IPAddr; + break; + } } } - - if ( !found ) - { - moduleHostName = (*pt1).HostName; - moduleIPAddr = (*pt1).IPAddr; - } - break; } } } } - - - if ( nicID == 1 ) { + + if ( nicID == 1 ) + { if ( moduleDisableState != oam::ENABLEDSTATE ) { string disabled = "y"; while (true) @@ -1860,6 +1889,7 @@ int main(int argc, char *argv[]) break; } + bool moduleHostNameFound = true; if (moduleHostName.empty()) { moduleHostNameFound = true; @@ -3579,6 +3609,10 @@ bool checkSaveConfigFile() //check if Columnstore.xml.rpmsave exist ifstream File (oldFileName.c_str()); if (!File) { + if (single_server_quick_install || multi_server_quick_install) + { + return true; + } if ( noPrompting ) { cout << endl << "Old Config File not found '" + oldFileName + "', exiting" << endl; exit(1); diff --git a/oamapps/postConfigure/quick_installer_multi_server.sh b/oamapps/postConfigure/quick_installer_multi_server.sh index 25729ec7c..97aa670bc 100644 --- a/oamapps/postConfigure/quick_installer_multi_server.sh +++ b/oamapps/postConfigure/quick_installer_multi_server.sh @@ -6,16 +6,21 @@ pmIpAddrs="" umIpAddrs="" +nonDistrubutedInstall="-n" for arg in "$@"; do if [ `expr -- "$arg" : '--pm-ip-addresses='` -eq 18 ]; then pmIpAddrs="`echo $arg | awk -F= '{print $2}'`" - elif [ `expr -- "$arg" : '--pm-ip-addresses='` -eq 18 ]; then + elif [ `expr -- "$arg" : '--um-ip-addresses='` -eq 18 ]; then umIpAddrs="`echo $arg | awk -F= '{print $2}'`" + elif [ `expr -- "$arg" : '--dist-install'` -eq 14 ]; then + nonDistrubutedInstall=" " elif [ `expr -- "$arg" : '--help'` -eq 6 ]; then echo "Usage ./quick_installer_multi_server.sh [OPTION]" echo "" echo "Quick Installer for a Multi Server MariaDB ColumnStore Install" + echo "Defaults to non-distrubuted install, meaning MariaDB Columnstore" + echo "needs to be preinstalled on all nodes in the system" echo "" echo "Performace Module (pm) IP addresses required" echo "User Module (um) IP addresses option" @@ -24,40 +29,50 @@ for arg in "$@"; do echo echo "--pm-ip-addresses=xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx" echo "--um-ip-addresses=xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx" + echo "--dist-install Use Distributed Install Option" echo "" else - echo "quick_installer_multi_server.sh: ignoring unknown argument: $arg" 1>&2 + echo "quick_installer_multi_server.sh: unknown argument: $arg, use --help for help" 1>&2 + exit 1 fi done -if [ $pmIpAddrs == "" ] ; then +if [[ $pmIpAddrs = "" ]]; then echo "" echo "Performace Module (pm) IP addresses required, exiting" exit 1 else - if [ $umIpAddrs == "" ] ; then + if [[ $umIpAddrs = "" ]]; then echo "" - echo "Performing a Multi-Server Combined install with um/pm running on some server" + echo "NOTE: Performing a Multi-Server Combined install with um/pm running on some server" echo"" else echo "" - echo "Performing a Multi-Server Seperate install with um and pm running on seperate servers" + echo "NOTE: Performing a Multi-Server Seperate install with um and pm running on seperate servers" echo"" fi fi -if [ $HOME == "/root" ]; then - echo "Run post-install script" +if [[ $HOME = "/root" ]]; then + echo "${bold}Run post-install script${normal}" echo "" /usr/local/mariadb/columnstore/bin/post-install - echo "Run postConfigure script" + echo "${bold}Run postConfigure script${normal}" echo "" - /usr/local/mariadb/columnstore/bin/postConfigure -qm -pm-ip-addrs=$pmIpAddrs -um-ip-addrs=$umIpAddrs + if [[ $umIpAddrs = "" ]]; then + /usr/local/mariadb/columnstore/bin/postConfigure -qm -pm-ip-addrs $pmIpAddrs $nonDistrubutedInstall + else + /usr/local/mariadb/columnstore/bin/postConfigure -qm -pm-ip-addrs $pmIpAddrs -um-ip-addrs $umIpAddrs $nonDistrubutedInstall + fi else - echo "Run post-install script" + echo "${bold}Run post-install script${normal}" echo "" $HOME/mariadb/columnstore/bin/post-install --installdir=$HOME/mariadb/columnstore - echo "Run postConfigure script" + echo "${bold}Run postConfigure script${normal}" echo "" - $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qm -pm-ip-addrs=$pmIpAddrs -um-ip-addrs=$umIpAddrs + if [[ $umIpAddrs = "" ]]; then + $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qm -pm-ip-addrs $pmIpAddrs $nonDistrubutedInstall + else + $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qm -pm-ip-addrs $pmIpAddrs -um-ip-addrs $umIpAddrs $nonDistrubutedInstall + fi fi From 4f6949835d50a9f0b1e5e421832c61d50e522076 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Wed, 11 Jul 2018 15:32:27 +0100 Subject: [PATCH 080/123] MCOL-1037 Fix race condition in FIFO buffer The FIFO buffer could get data in next() whilst a data swap is happening due to a rare race condition. This patch adds mutexes around the parts that could race. The observed effect of this race was during a complex aggregate query the results would occasionally be incorrect. In addition this fixes a race condition in PrimProc's regex processor. --- dbcon/joblist/fifo.h | 10 ++++++++++ primitives/linux-port/column.cpp | 3 +-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/dbcon/joblist/fifo.h b/dbcon/joblist/fifo.h index a6bd286da..6a0fca56d 100644 --- a/dbcon/joblist/fifo.h +++ b/dbcon/joblist/fifo.h @@ -345,17 +345,27 @@ void FIFO::signalPs() template inline bool FIFO::next(uint64_t id, element_t *out) { + base::mutex.lock(); fConsumptionStarted = true; if (cpos[id] >= fMaxElements) + { + base::mutex.unlock(); if (!waitForSwap(id)) return false; + base::mutex.lock(); + } *out = cBuffer[cpos[id]++]; #ifndef ONE_CS if (cpos[id] == fMaxElements) + { + base::mutex.unlock(); signalPs(); + return true; + } #endif + base::mutex.unlock(); return true; } diff --git a/primitives/linux-port/column.cpp b/primitives/linux-port/column.cpp index cdb70ef67..bed121517 100644 --- a/primitives/linux-port/column.cpp +++ b/primitives/linux-port/column.cpp @@ -65,8 +65,6 @@ inline uint64_t order_swap(uint64_t x) template inline string fixChar(int64_t intval); -idb_regex_t placeholderRegex; - template inline int compareBlock( const void * a, const void * b ) { @@ -954,6 +952,7 @@ inline void p_Col_ridArray(NewColRequestHeader *in, uint16_t *ridArray=0; uint8_t *in8 = reinterpret_cast(in); const uint8_t filterSize = sizeof(uint8_t) + sizeof(uint8_t) + W; + idb_regex_t placeholderRegex; placeholderRegex.used = false; From cc474f429cff33254482c8616d74858351b81c9f Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Wed, 11 Jul 2018 20:57:17 +0300 Subject: [PATCH 081/123] MCOL-1510 GROUP BY supports aggregates as arguments in string functions. --- dbcon/mysql/ha_calpont_execplan.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index a7c9cd7bf..fad5747fc 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -2887,7 +2887,7 @@ ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupp if (func_name == "+" || func_name == "-" || func_name == "*" || func_name == "/" ) return buildArithmeticColumn(ifp, gwi, nonSupport, pushdownHand); else - return buildFunctionColumn(ifp, gwi, nonSupport); + return buildFunctionColumn(ifp, gwi, nonSupport, pushdownHand); } case Item::SUM_FUNC_ITEM: @@ -3410,7 +3410,7 @@ ReturnedColumn* buildFunctionColumn( return NULL; } - ReturnedColumn* rc = buildReturnedColumn(ifp->arguments()[i], gwi, nonSupport); + ReturnedColumn* rc = buildReturnedColumn(ifp->arguments()[i], gwi, nonSupport, pushdownHand); // MCOL-1510 It must be a temp table field, so find the corresponding column. if (pushdownHand @@ -8861,7 +8861,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro redo = true; // @bug 1706 String funcStr; - ifp->print(&funcStr, QT_INFINIDB); + //ifp->print(&funcStr, QT_INFINIDB); gwi.selectCols.push_back(string(funcStr.c_ptr()) + " `" + escapeBackTick(ifp->name) + "`"); // clear the error set by buildFunctionColumn gwi.fatalParseError = false; @@ -9900,7 +9900,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro sel_query += ", "; } - select_query.replace(lower_select_query.find("select *"), string("select *").length(), sel_query); + //select_query.replace(lower_select_query.find("select *"), string("select *").length(), sel_query); } else { From e1d0916b6c4bd6f5d913f31fd1f6b58d8739c523 Mon Sep 17 00:00:00 2001 From: David Hill Date: Wed, 11 Jul 2018 15:48:04 -0500 Subject: [PATCH 082/123] add in amazon quikc installer --- oamapps/postConfigure/CMakeLists.txt | 10 +- oamapps/postConfigure/postConfigure.cpp | 149 +++++++++++++++--- .../postConfigure/quick_installer_amazon.sh | 80 ++++++++++ .../quick_installer_multi_server.sh | 24 +-- 4 files changed, 224 insertions(+), 39 deletions(-) create mode 100644 oamapps/postConfigure/quick_installer_amazon.sh diff --git a/oamapps/postConfigure/CMakeLists.txt b/oamapps/postConfigure/CMakeLists.txt index cfe3d700e..4bdbadd1a 100644 --- a/oamapps/postConfigure/CMakeLists.txt +++ b/oamapps/postConfigure/CMakeLists.txt @@ -37,13 +37,13 @@ install(TARGETS getMySQLpw DESTINATION ${ENGINE_BINDIR} COMPONENT platform) ########### next target ############### -set(amazonInstaller_SRCS amazonInstaller.cpp helpers.cpp) +#set(amazonInstaller_SRCS amazonInstaller.cpp helpers.cpp) -add_executable(amazonInstaller ${amazonInstaller_SRCS}) +#add_executable(amazonInstaller ${amazonInstaller_SRCS}) -target_link_libraries(amazonInstaller ${ENGINE_LDFLAGS} readline ncurses ${SNMP_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_EXEC_LIBS}) +#target_link_libraries(amazonInstaller ${ENGINE_LDFLAGS} readline ncurses ${SNMP_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_EXEC_LIBS}) -install(TARGETS amazonInstaller DESTINATION ${ENGINE_BINDIR} COMPONENT platform) +#install(TARGETS amazonInstaller DESTINATION ${ENGINE_BINDIR} COMPONENT platform) ########### next target ############### @@ -59,5 +59,5 @@ install(TARGETS mycnfUpgrade DESTINATION ${ENGINE_BINDIR} COMPONENT platform) ########### next target ############### -install(PROGRAMS quick_installer_single_server.sh quick_installer_multi_server.sh +install(PROGRAMS quick_installer_single_server.sh quick_installer_multi_server.sh quick_installer_amazon.sh DESTINATION ${ENGINE_BINDIR} COMPONENT platform) diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index 5e3fdd964..98227da9d 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -189,6 +189,7 @@ bool nonDistribute = false; bool nonDistributeFlag = false; bool single_server_quick_install = false; bool multi_server_quick_install = false; +bool amazon_quick_install = false; string DataFileEnvFile; @@ -296,7 +297,7 @@ int main(int argc, char *argv[]) cout << " Enter one of the options within [], if available, or" << endl; cout << " Enter a new value" << endl << endl; cout << endl; - cout << "Usage: postConfigure [-h][-c][-u][-p][-qs][-qm][-port][-i][-n][-d][-sn][-pm-ip-addrs][-um-ip-addrs]" << endl; + cout << "Usage: postConfigure [-h][-c][-u][-p][-qs][-qm][-qa][-port][-i][-n][-d][-sn][-pm-ip-addrs][-um-ip-addrs][-pm-count][-um-count]" << endl; cout << " -h Help" << endl; cout << " -c Config File to use to extract configuration data, default is Columnstore.xml.rpmsave" << endl; cout << " -u Upgrade, Install using the Config File from -c, default to Columnstore.xml.rpmsave" << endl; @@ -323,6 +324,11 @@ int main(int argc, char *argv[]) multi_server_quick_install = true; noPrompting = true; } + else if( string("-qa") == argv[i] ) + { + amazon_quick_install = true; + noPrompting = true; + } else if( string("-f") == argv[i] ) nodeps = "--nodeps"; else if( string("-o") == argv[i] ) @@ -424,11 +430,31 @@ int main(int argc, char *argv[]) exit (1); } umIpAddrs = argv[i]; + } + else if( string("-pm-count") == argv[i] ) + { + i++; + if (i >= argc ) + { + cout << " ERROR: PM-COUNT not provided" << endl; + exit (1); + } + pmNumber = atoi(argv[i]); + } + else if( string("-um-count") == argv[i] ) + { + i++; + if (i >= argc ) + { + cout << " ERROR: UM-COUNT not provided" << endl; + exit (1); + } + umNumber = atoi(argv[i]); } else { cout << " ERROR: Invalid Argument = " << argv[i] << endl; - cout << " Usage: postConfigure [-h][-c][-u][-p][-qs][-qm][-port][-i][-n][-sn][-pm-ip-addrs][-um-ip-addrs]" << endl; + cout << " Usage: postConfigure [-h][-c][-u][-p][-qs][-qm][-qa][-port][-i][-n][-d][-sn][-pm-ip-addrs][-um-ip-addrs][-pm-count][-um-count]" << endl; exit (1); } } @@ -436,14 +462,25 @@ int main(int argc, char *argv[]) //check if quick install multi-server has been given ip address if (multi_server_quick_install) { - if ( umIpAddrs.empty() && pmIpAddrs.empty() || - !umIpAddrs.empty() && pmIpAddrs.empty() ) + if ( ( umIpAddrs.empty() && pmIpAddrs.empty() ) || + ( !umIpAddrs.empty() && pmIpAddrs.empty() )) { cout << " ERROR: Multi-Server option entered, but invalid UM/PM IP addresses were provided, exiting" << endl; exit(1); } } + //check if quick install multi-server has been given ip address + if (amazon_quick_install) + { + if ( ( umNumber == 0 && pmNumber == 0 ) || + ( umNumber != 0 && pmNumber == 0 ) ) + { + cout << " ERROR: Amazon option entered, but invalid UM/PM Counts were provided, exiting" << endl; + exit(1); + } + } + if (installDir[0] != '/') { cout << " ERROR: Install dir '" << installDir << "' is not absolute" << endl; @@ -462,7 +499,7 @@ int main(int argc, char *argv[]) cout << "IMPORTANT: This tool requires to run on the Performance Module #1" << endl; cout << endl; - if (!single_server_quick_install || !multi_server_quick_install) + if (!single_server_quick_install || !multi_server_quick_install || !amazon_quick_install) { if (!noPrompting) { cout << "Prompting instructions:" << endl << endl; @@ -723,6 +760,12 @@ int main(int argc, char *argv[]) singleServerInstall = "2"; } + else if (amazon_quick_install) + { + cout << "===== Quick Install Amazon Configuration =====" << endl << endl; + + singleServerInstall = "2"; + } else { cout << "===== Setup System Server Type Configuration =====" << endl << endl; @@ -943,6 +986,42 @@ int main(int argc, char *argv[]) MaxNicID = 1; } + else + { + if (amazon_quick_install) + { + //set configuarion settings for default setup + try { + sysConfig->setConfig(InstallSection, "MySQLRep", "y"); + } + catch(...) + {} + + try { + sysConfig->setConfig(InstallSection, "Cloud", "amazon-vpc"); + } + catch(...) + {} + + if (umNumber == 0 ) + { + // set Server Type Installation to combined + try { + sysConfig->setConfig(InstallSection, "ServerTypeInstall", "2"); + } + catch(...) + {} + } + + if ( !writeConfig(sysConfig) ) + { + cout << "ERROR: Failed trying to update MariaDB ColumnStore System Configuration file" << endl; + exit(1); + } + + MaxNicID = 1; + } + } cout << endl; //cleanup/create local/etc directory @@ -1174,13 +1253,30 @@ int main(int argc, char *argv[]) in.seekg(0, std::ios::end); int size = in.tellg(); - if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not found")) - // not running on amazon with ec2-api-tools + if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not found")) + { + // not running on amazon with ec2-api-tools + if (amazon_quick_install) + { + cout << "ERROR: Amazon Quick Installer was specified, bu the AMazon CLI API packages isnt installed, exiting" << endl; + exit(1); + } + amazonInstall = false; + } else { - if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not installed")) + if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not installed")) + { + // not running on amazon with ec2-api-tools + if (amazon_quick_install) + { + cout << "ERROR: Amazon Quick Installer was specified, bu the AMazon CLI API packages isnt installed, exiting" << endl; + exit(1); + } + amazonInstall = false; + } else amazonInstall = true; } @@ -1216,7 +1312,7 @@ int main(int argc, char *argv[]) amazonInstall = false; try { - sysConfig->setConfig(InstallSection, "Cloud", "disable"); + sysConfig->setConfig(InstallSection, "Cloud", "disable"); } catch(...) {}; @@ -1674,6 +1770,7 @@ int main(int argc, char *argv[]) exit(1); continue; } + //update count try { string ModuleCountParm = "ModuleCount" + oam.itoa(i+1); @@ -1929,23 +2026,25 @@ int main(int argc, char *argv[]) //check if need to create instance or user enter ID string create = "y"; - while(true) + if ( !amazon_quick_install ) { - pcommand = callReadline("Create Instance for " + newModuleName + " [y,n] (y) > "); - if (pcommand) + while(true) { - if (strlen(pcommand) > 0) create = pcommand; - callFree(pcommand); + pcommand = callReadline("Create Instance for " + newModuleName + " [y,n] (y) > "); + if (pcommand) + { + if (strlen(pcommand) > 0) create = pcommand; + callFree(pcommand); + } + if ( create == "y" || create == "n" ) + break; + else + cout << "Invalid Entry, please enter 'y' for yes or 'n' for no" << endl; + create = "y"; + if ( noPrompting ) + exit(1); } - if ( create == "y" || create == "n" ) - break; - else - cout << "Invalid Entry, please enter 'y' for yes or 'n' for no" << endl; - create = "y"; - if ( noPrompting ) - exit(1); } - if ( create == "y" ) { ModuleIP moduleip; @@ -2498,7 +2597,7 @@ int main(int argc, char *argv[]) string dbrootList; - if (multi_server_quick_install) + if (multi_server_quick_install || amazon_quick_install) { dbrootList = oam.itoa(moduleID); } @@ -3609,7 +3708,7 @@ bool checkSaveConfigFile() //check if Columnstore.xml.rpmsave exist ifstream File (oldFileName.c_str()); if (!File) { - if (single_server_quick_install || multi_server_quick_install) + if (single_server_quick_install || multi_server_quick_install || amazon_quick_install) { return true; } @@ -3621,7 +3720,7 @@ bool checkSaveConfigFile() } else { - if (single_server_quick_install || multi_server_quick_install) + if (single_server_quick_install || multi_server_quick_install || amazon_quick_install) { cout << endl << "Quick Install is for fresh installs only, '" + oldFileName + "' exist, exiting" << endl; exit(1); diff --git a/oamapps/postConfigure/quick_installer_amazon.sh b/oamapps/postConfigure/quick_installer_amazon.sh new file mode 100644 index 000000000..395a3bf30 --- /dev/null +++ b/oamapps/postConfigure/quick_installer_amazon.sh @@ -0,0 +1,80 @@ +#!/bin/bash +# +# $Id: quick_installer_amazon.sh 3705 2018-07-07 19:47:20Z dhill $ +# +# Poddst- Quick Installer for Amazon MariaDB Columnstore + +pmCount="" +umCount="" +systemName="" + +for arg in "$@"; do + if [ `expr -- "$arg" : '--pm-count='` -eq 11 ]; then + pmCount="`echo $arg | awk -F= '{print $2}'`" + elif [ `expr -- "$arg" : '--um-count='` -eq 11 ]; then + umCount="`echo $arg | awk -F= '{print $2}'`" + elif [ `expr -- "$arg" : '--systemName='` -eq 13 ]; then + systemName="`echo $arg | awk -F= '{print $2}'`" + systemName="-sn "$systemName + elif [ `expr -- "$arg" : '--dist-install'` -eq 14 ]; then + nonDistrubutedInstall=" " + elif [ `expr -- "$arg" : '--help'` -eq 6 ]; then + echo "Usage ./quick_installer_amazon.sh [OPTION]" + echo "" + echo "Quick Installer for an Amazon MariaDB ColumnStore Install" + echo "This requires to be run on a MariaDB ColumnStore AMI" + echo "" + echo "Performace Module (pm) number is required" + echo "User Module (um) number is option" + echo "When only pm counts provided, system is combined setup" + echo "When both pm/um counts provided, system is seperate setup" + echo + echo "--pm-count=x Number of pm instances to create" + echo "--um-count=x Number of um instances to create, optional" + echo "--system-name=nnnn System Name, optional" + echo "" + else + echo "./quick_installer_amazon.sh: unknown argument: $arg, enter --help for help" 1>&2 + exit 1 + fi +done + +if [[ $pmCount = "" ]]; then + echo "" + echo "Performace Module (pm) count is required, exiting" + exit 1 +else + if [[ $umCount = "" ]]; then + echo "" + echo "NOTE: Performing a Multi-Server Combined install with um/pm running on some server" + echo"" + else + echo "" + echo "NOTE: Performing a Multi-Server Seperate install with um and pm running on seperate servers" + echo"" + fi +fi + +if [[ $HOME = "/root" ]]; then + echo "${bold}Run post-install script${normal}" + echo "" + /usr/local/mariadb/columnstore/bin/post-install + echo "${bold}Run postConfigure script${normal}" + echo "" + if [[ $umCount = "" ]]; then + /usr/local/mariadb/columnstore/bin/postConfigure -qa -pm-count $pmCount $systemName + else + /usr/local/mariadb/columnstore/bin/postConfigure -qa -pm-count $pmCount -um-count $umCount $systemName + fi +else + echo "${bold}Run post-install script${normal}" + echo "" + $HOME/mariadb/columnstore/bin/post-install --installdir=$HOME/mariadb/columnstore + echo "${bold}Run postConfigure script${normal}" + echo "" + if [[ $umCount = "" ]]; then + $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qa -pm-count $pmCount $systemName + else + $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qa -pm-count $pmCount -um-count $umCount $systemName + fi +fi diff --git a/oamapps/postConfigure/quick_installer_multi_server.sh b/oamapps/postConfigure/quick_installer_multi_server.sh index 97aa670bc..1f4e29cd0 100644 --- a/oamapps/postConfigure/quick_installer_multi_server.sh +++ b/oamapps/postConfigure/quick_installer_multi_server.sh @@ -7,32 +7,38 @@ pmIpAddrs="" umIpAddrs="" nonDistrubutedInstall="-n" +systemName="" for arg in "$@"; do if [ `expr -- "$arg" : '--pm-ip-addresses='` -eq 18 ]; then pmIpAddrs="`echo $arg | awk -F= '{print $2}'`" elif [ `expr -- "$arg" : '--um-ip-addresses='` -eq 18 ]; then umIpAddrs="`echo $arg | awk -F= '{print $2}'`" + elif [ `expr -- "$arg" : '--systemName='` -eq 13 ]; then + systemName="`echo $arg | awk -F= '{print $2}'`" + systemName="-sn "$systemName elif [ `expr -- "$arg" : '--dist-install'` -eq 14 ]; then nonDistrubutedInstall=" " elif [ `expr -- "$arg" : '--help'` -eq 6 ]; then echo "Usage ./quick_installer_multi_server.sh [OPTION]" echo "" echo "Quick Installer for a Multi Server MariaDB ColumnStore Install" + echo "" echo "Defaults to non-distrubuted install, meaning MariaDB Columnstore" echo "needs to be preinstalled on all nodes in the system" echo "" - echo "Performace Module (pm) IP addresses required" - echo "User Module (um) IP addresses option" + echo "Performace Module (pm) IP addresses are required" + echo "User Module (um) IP addresses are option" echo "When only pm IP addresses provided, system is combined setup" echo "When both pm/um IP addresses provided, system is seperate setup" echo echo "--pm-ip-addresses=xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx" - echo "--um-ip-addresses=xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx" + echo "--um-ip-addresses=xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx, optional" echo "--dist-install Use Distributed Install Option" + echo "--system-name=nnnn System Name, optional" echo "" else - echo "quick_installer_multi_server.sh: unknown argument: $arg, use --help for help" 1>&2 + echo "quick_installer_multi_server.sh: unknown argument: $arg, enter --help for help" 1>&2 exit 1 fi done @@ -58,11 +64,11 @@ if [[ $HOME = "/root" ]]; then echo "" /usr/local/mariadb/columnstore/bin/post-install echo "${bold}Run postConfigure script${normal}" - echo "" + echo "" if [[ $umIpAddrs = "" ]]; then - /usr/local/mariadb/columnstore/bin/postConfigure -qm -pm-ip-addrs $pmIpAddrs $nonDistrubutedInstall + /usr/local/mariadb/columnstore/bin/postConfigure -qm -pm-ip-addrs $pmIpAddrs $nonDistrubutedInstall $systemName else - /usr/local/mariadb/columnstore/bin/postConfigure -qm -pm-ip-addrs $pmIpAddrs -um-ip-addrs $umIpAddrs $nonDistrubutedInstall + /usr/local/mariadb/columnstore/bin/postConfigure -qm -pm-ip-addrs $pmIpAddrs -um-ip-addrs $umIpAddrs $nonDistrubutedInstall $systemName fi else echo "${bold}Run post-install script${normal}" @@ -71,8 +77,8 @@ else echo "${bold}Run postConfigure script${normal}" echo "" if [[ $umIpAddrs = "" ]]; then - $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qm -pm-ip-addrs $pmIpAddrs $nonDistrubutedInstall + $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qm -pm-ip-addrs $pmIpAddrs $nonDistrubutedInstall $systemName else - $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qm -pm-ip-addrs $pmIpAddrs -um-ip-addrs $umIpAddrs $nonDistrubutedInstall + $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qm -pm-ip-addrs $pmIpAddrs -um-ip-addrs $umIpAddrs $nonDistrubutedInstall $systemName fi fi From a62a2e321ec3dece0fcfa85cce776875db4e7556 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Thu, 12 Jul 2018 14:25:46 +0300 Subject: [PATCH 083/123] MCOL-1510 CS prints IDB-1001 error when agregates used in non-supported functions, e.g. NOT(sum(i)). --- dbcon/mysql/ha_calpont_execplan.cpp | 37 ++++++++++++++++++++++------- dbcon/mysql/ha_calpont_impl_if.h | 4 ++-- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index fad5747fc..889a9d77f 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -191,7 +191,7 @@ bool nonConstFunc(Item_func* ifp) return false; } -ReturnedColumn* findCorrespTempField(Item_ref* item, gp_walk_info& gwi) +ReturnedColumn* findCorrespTempField(Item_ref* item, gp_walk_info& gwi, bool clone = true) { ReturnedColumn* result = NULL; uint32_t i; @@ -201,7 +201,10 @@ ReturnedColumn* findCorrespTempField(Item_ref* item, gp_walk_info& gwi) gwi.returnedCols[i]->alias().c_str() && !strcasecmp(item->ref[0]->name, gwi.returnedCols[i]->alias().c_str())) { - result = gwi.returnedCols[i]->clone(); + if (clone) + result = gwi.returnedCols[i]->clone(); + else + result = gwi.returnedCols[i].get(); break; } } @@ -5455,7 +5458,10 @@ void gp_walk(const Item* item, void* arg) * the involved item_fields to the passed in vector. It's used in parsing * functions or arithmetic expressions for vtable post process. */ -void parse_item (Item* item, vector& field_vec, bool& hasNonSupportItem, uint16_t& parseInfo) +void parse_item (Item* item, vector& field_vec, + bool& hasNonSupportItem, + uint16_t& parseInfo, + gp_walk_info* gwi) { Item::Type itype = item->type(); @@ -5493,7 +5499,7 @@ void parse_item (Item* item, vector& field_vec, bool& hasNonSupport } for (uint32_t i = 0; i < isp->argument_count(); i++) - parse_item(isp->arguments()[i], field_vec, hasNonSupportItem, parseInfo); + parse_item(isp->arguments()[i], field_vec, hasNonSupportItem, parseInfo, gwi); // parse_item(sfitempp[i], field_vec, hasNonSupportItem, parseInfo); break; @@ -5538,8 +5544,20 @@ void parse_item (Item* item, vector& field_vec, bool& hasNonSupport } else if ((*(ref->ref))->type() == Item::FIELD_ITEM) { - Item_field* ifp = reinterpret_cast(*(ref->ref)); - field_vec.push_back(ifp); + // MCOL-1510. This could be a non-supported function + // argument in form of a temp_table_field, so check + // and set hasNonSupportItem if it is so. + ReturnedColumn* rc = NULL; + if (gwi) + rc = findCorrespTempField(ref, *gwi, false); + + if (!rc) + { + Item_field* ifp = reinterpret_cast(*(ref->ref)); + field_vec.push_back(ifp); + } + else + hasNonSupportItem = true; break; } else if ((*(ref->ref))->type() == Item::FUNC_ITEM) @@ -8771,7 +8789,10 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro { hasNonSupportItem = false; uint32_t before_size = funcFieldVec.size(); - parse_item(ifp, funcFieldVec, hasNonSupportItem, parseInfo); + // MCOL-1510 Use gwi pointer here to catch funcs with + // not supported aggregate args in projections, + // e.g. NOT(SUM(i)). + parse_item(ifp, funcFieldVec, hasNonSupportItem, parseInfo, &gwi); uint32_t after_size = funcFieldVec.size(); // group by func and func in subquery can not be post processed @@ -8861,7 +8882,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro redo = true; // @bug 1706 String funcStr; - //ifp->print(&funcStr, QT_INFINIDB); + ifp->print(&funcStr, QT_INFINIDB); gwi.selectCols.push_back(string(funcStr.c_ptr()) + " `" + escapeBackTick(ifp->name) + "`"); // clear the error set by buildFunctionColumn gwi.fatalParseError = false; diff --git a/dbcon/mysql/ha_calpont_impl_if.h b/dbcon/mysql/ha_calpont_impl_if.h index 40e746917..b7e668b2d 100644 --- a/dbcon/mysql/ha_calpont_impl_if.h +++ b/dbcon/mysql/ha_calpont_impl_if.h @@ -329,7 +329,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, execplan::SCSEP& cse void setError(THD* thd, uint32_t errcode, const std::string errmsg, gp_walk_info* gwi); void setError(THD* thd, uint32_t errcode, const std::string errmsg); void gp_walk(const Item* item, void* arg); -void parse_item (Item* item, std::vector& field_vec, bool& hasNonSupportItem, uint16& parseInfo); +void parse_item (Item* item, std::vector& field_vec, bool& hasNonSupportItem, uint16& parseInfo, gp_walk_info* gwip = NULL); const std::string bestTableName(const Item_field* ifp); bool isInfiniDB(TABLE* table_ptr); @@ -348,7 +348,7 @@ void addIntervalArgs(Item_func* ifp, funcexp::FunctionParm& functionParms); void castCharArgs(Item_func* ifp, funcexp::FunctionParm& functionParms); void castDecimalArgs(Item_func* ifp, funcexp::FunctionParm& functionParms); void castTypeArgs(Item_func* ifp, funcexp::FunctionParm& functionParms); -void parse_item (Item* item, std::vector& field_vec, bool& hasNonSupportItem, uint16& parseInfo); +//void parse_item (Item* item, std::vector& field_vec, bool& hasNonSupportItem, uint16& parseInfo); bool isPredicateFunction(Item* item, gp_walk_info* gwip); execplan::ParseTree* buildRowPredicate(execplan::RowColumn* lhs, execplan::RowColumn* rhs, std::string predicateOp); bool buildRowColumnFilter(gp_walk_info* gwip, execplan::RowColumn* rhs, execplan::RowColumn* lhs, Item_func* ifp); From fb8aab959dc82c64e61ac7030e88d333e4fa79a9 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Thu, 12 Jul 2018 15:13:43 +0100 Subject: [PATCH 084/123] MCOL-1433 Fix some functions for TIME Fixes the following: * CAST() (as DATE/DATETIME) * DATE() * DATE_FORMAT() * MAKEDATE() * NULLIF() * TIMEDIFF() * TO_DAYS() / DATEDIFF() --- utils/funcexp/func_cast.cpp | 68 ++++++++++++++++++++++++++++++ utils/funcexp/func_date.cpp | 11 +++++ utils/funcexp/func_date_format.cpp | 28 ++++++++++++ utils/funcexp/func_makedate.cpp | 15 +++++++ utils/funcexp/func_nullif.cpp | 3 +- utils/funcexp/func_timediff.cpp | 6 +++ utils/funcexp/func_to_days.cpp | 4 ++ 7 files changed, 134 insertions(+), 1 deletion(-) diff --git a/utils/funcexp/func_cast.cpp b/utils/funcexp/func_cast.cpp index 3542e341d..b396c45ea 100644 --- a/utils/funcexp/func_cast.cpp +++ b/utils/funcexp/func_cast.cpp @@ -589,6 +589,33 @@ int32_t Func_cast_date::getDateIntVal(rowgroup::Row& row, { return parm[0]->data()->getDateIntVal(row, isNull); } + case execplan::CalpontSystemCatalog::TIME: + { + int64_t val1; + string value = ""; + DateTime aDateTime = static_cast(nowDatetime()); + Time aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + if ((aTime.hour < 0) || (aTime.is_neg)) + { + aTime.hour = -abs(aTime.hour); + aTime.minute = -abs(aTime.minute); + aTime.second = -abs(aTime.second); + aTime.msecond = -abs(aTime.msecond); + } + + aDateTime.hour = 0; + aDateTime.minute = 0; + aDateTime.second = 0; + aDateTime.msecond = 0; + val1 = addTime(aDateTime, aTime); + value = dataconvert::DataConvert::datetimeToString(val1); + value = value.substr(0, 10); + return dataconvert::DataConvert::stringToDate(value); + break; + } + + default: { @@ -680,6 +707,27 @@ int64_t Func_cast_date::getDatetimeIntVal(rowgroup::Row& row, val1.msecond = 0; return *(reinterpret_cast(&val1)); } + case CalpontSystemCatalog::TIME: + { + DateTime aDateTime = static_cast(nowDatetime()); + Time aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + if ((aTime.hour < 0) || (aTime.is_neg)) + { + aTime.hour = -abs(aTime.hour); + aTime.minute = -abs(aTime.minute); + aTime.second = -abs(aTime.second); + aTime.msecond = -abs(aTime.msecond); + } + + aDateTime.hour = 0; + aDateTime.minute = 0; + aDateTime.second = 0; + aDateTime.msecond = 0; + val = addTime(aDateTime, aTime); + return val; + } + default: { @@ -814,6 +862,26 @@ int64_t Func_cast_datetime::getDatetimeIntVal(rowgroup::Row& row, return parm[0]->data()->getDatetimeIntVal(row, isNull); } + case CalpontSystemCatalog::TIME: + { + DateTime aDateTime = static_cast(nowDatetime()); + Time aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aDateTime.hour = 0; + aDateTime.minute = 0; + aDateTime.second = 0; + aDateTime.msecond = 0; + if ((aTime.hour < 0) || (aTime.is_neg)) + { + aTime.hour = -abs(aTime.hour); + aTime.minute = -abs(aTime.minute); + aTime.second = -abs(aTime.second); + aTime.msecond = -abs(aTime.msecond); + } + aTime.day = 0; + return addTime(aDateTime, aTime); + break; + } + default: { isNull = true; diff --git a/utils/funcexp/func_date.cpp b/utils/funcexp/func_date.cpp index d0bc30942..7fc990ab6 100644 --- a/utils/funcexp/func_date.cpp +++ b/utils/funcexp/func_date.cpp @@ -82,6 +82,17 @@ int64_t Func_date::getIntVal(rowgroup::Row& row, aDateTime = static_cast(nowDatetime()); aTime = parm[0]->data()->getTimeIntVal(row, isNull); aTime.day = 0; + aDateTime.hour = 0; + aDateTime.minute = 0; + aDateTime.second = 0; + aDateTime.msecond = 0; + if ((aTime.hour < 0) || (aTime.is_neg)) + { + aTime.hour = -abs(aTime.hour); + aTime.minute = -abs(aTime.minute); + aTime.second = -abs(aTime.second); + aTime.msecond = -abs(aTime.msecond); + } val = addTime(aDateTime, aTime); value = dataconvert::DataConvert::datetimeToString(val); value = value.substr(0, 10); diff --git a/utils/funcexp/func_date_format.cpp b/utils/funcexp/func_date_format.cpp index 033ceda02..9a25cb941 100644 --- a/utils/funcexp/func_date_format.cpp +++ b/utils/funcexp/func_date_format.cpp @@ -269,6 +269,34 @@ string Func_date_format::getStrVal(rowgroup::Row& row, dt.msecond = (uint32_t)((val & 0xfffff)); break; + case CalpontSystemCatalog::TIME: + { + DateTime aDateTime = static_cast(nowDatetime()); + Time aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aTime.day = 0; + aDateTime.hour = 0; + aDateTime.minute = 0; + aDateTime.second = 0; + aDateTime.msecond = 0; + if ((aTime.hour < 0) || (aTime.is_neg)) + { + aTime.hour = -abs(aTime.hour); + aTime.minute = -abs(aTime.minute); + aTime.second = -abs(aTime.second); + aTime.msecond = -abs(aTime.msecond); + } + val = addTime(aDateTime, aTime); + dt.year = (uint32_t)((val >> 48) & 0xffff); + dt.month = (uint32_t)((val >> 44) & 0xf); + dt.day = (uint32_t)((val >> 38) & 0x3f); + dt.hour = (uint32_t)((val >> 32) & 0x3f); + dt.minute = (uint32_t)((val >> 26) & 0x3f); + dt.second = (uint32_t)((val >> 20) & 0x3f); + dt.msecond = (uint32_t)((val & 0xfffff)); + break; + } + + case CalpontSystemCatalog::CHAR: case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::TEXT: diff --git a/utils/funcexp/func_makedate.cpp b/utils/funcexp/func_makedate.cpp index 2a30515f1..948b612de 100644 --- a/utils/funcexp/func_makedate.cpp +++ b/utils/funcexp/func_makedate.cpp @@ -146,11 +146,26 @@ uint64_t makedate(rowgroup::Row& row, break; } + case CalpontSystemCatalog::TIME: + { + std::ostringstream ss; + Time aTime = parm[1]->data()->getTimeIntVal(row, isNull); + ss << aTime.hour << aTime.minute << aTime.second; + dayofyear = ss.str(); + break; + } + default: isNull = true; return 0; } + if (atoi(dayofyear.c_str()) == 0) + { + isNull = true; + return 0; + } + // convert the year to a date in our internal format, then subtract // one since we are about to add the day of year back in Date d(year, 1, 1); diff --git a/utils/funcexp/func_nullif.cpp b/utils/funcexp/func_nullif.cpp index a268b0ea1..04a45534a 100644 --- a/utils/funcexp/func_nullif.cpp +++ b/utils/funcexp/func_nullif.cpp @@ -531,7 +531,8 @@ int64_t Func_nullif::getTimeIntVal(rowgroup::Row& row, default: { - isNull = true; + isNull = false; + return exp1; } } diff --git a/utils/funcexp/func_timediff.cpp b/utils/funcexp/func_timediff.cpp index 742e8faf7..d17511f76 100644 --- a/utils/funcexp/func_timediff.cpp +++ b/utils/funcexp/func_timediff.cpp @@ -109,6 +109,12 @@ string Func_timediff::getStrVal(rowgroup::Row& row, int64_t val1 = -1, val2 = -1; bool isDate1 = false, isDate2 = false; + if (type1 != type2) + { + isNull = true; + return ""; + } + switch (type1) { case execplan::CalpontSystemCatalog::DATE: diff --git a/utils/funcexp/func_to_days.cpp b/utils/funcexp/func_to_days.cpp index cc2e3afa2..f16642958 100644 --- a/utils/funcexp/func_to_days.cpp +++ b/utils/funcexp/func_to_days.cpp @@ -91,6 +91,10 @@ int64_t Func_to_days::getIntVal(rowgroup::Row& row, int64_t val; aDateTime = static_cast(nowDatetime()); aTime = parm[0]->data()->getTimeIntVal(row, isNull); + aDateTime.hour = 0; + aDateTime.minute = 0; + aDateTime.second = 0; + aDateTime.msecond = 0; aTime.day = 0; val = addTime(aDateTime, aTime); year = (uint32_t)((val >> 48) & 0xffff); From 42448e90224306d464429fd1245f15989aba3df4 Mon Sep 17 00:00:00 2001 From: David Hill Date: Thu, 12 Jul 2018 09:25:57 -0500 Subject: [PATCH 085/123] add system name option --- oamapps/postConfigure/quick_installer_amazon.sh | 2 +- oamapps/postConfigure/quick_installer_multi_server.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/oamapps/postConfigure/quick_installer_amazon.sh b/oamapps/postConfigure/quick_installer_amazon.sh index 395a3bf30..f867aa5a9 100644 --- a/oamapps/postConfigure/quick_installer_amazon.sh +++ b/oamapps/postConfigure/quick_installer_amazon.sh @@ -13,7 +13,7 @@ for arg in "$@"; do pmCount="`echo $arg | awk -F= '{print $2}'`" elif [ `expr -- "$arg" : '--um-count='` -eq 11 ]; then umCount="`echo $arg | awk -F= '{print $2}'`" - elif [ `expr -- "$arg" : '--systemName='` -eq 13 ]; then + elif [ `expr -- "$arg" : '--system-name='` -eq 14 ]; then systemName="`echo $arg | awk -F= '{print $2}'`" systemName="-sn "$systemName elif [ `expr -- "$arg" : '--dist-install'` -eq 14 ]; then diff --git a/oamapps/postConfigure/quick_installer_multi_server.sh b/oamapps/postConfigure/quick_installer_multi_server.sh index 1f4e29cd0..239fca1e9 100644 --- a/oamapps/postConfigure/quick_installer_multi_server.sh +++ b/oamapps/postConfigure/quick_installer_multi_server.sh @@ -14,7 +14,7 @@ for arg in "$@"; do pmIpAddrs="`echo $arg | awk -F= '{print $2}'`" elif [ `expr -- "$arg" : '--um-ip-addresses='` -eq 18 ]; then umIpAddrs="`echo $arg | awk -F= '{print $2}'`" - elif [ `expr -- "$arg" : '--systemName='` -eq 13 ]; then + elif [ `expr -- "$arg" : '--system-name='` -eq 14 ]; then systemName="`echo $arg | awk -F= '{print $2}'`" systemName="-sn "$systemName elif [ `expr -- "$arg" : '--dist-install'` -eq 14 ]; then @@ -34,7 +34,7 @@ for arg in "$@"; do echo echo "--pm-ip-addresses=xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx" echo "--um-ip-addresses=xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx, optional" - echo "--dist-install Use Distributed Install Option" + echo "--dist-install Use Distributed Install, optional" echo "--system-name=nnnn System Name, optional" echo "" else From 16cf8b79cd03b132971cb71d3a76ddc6cc6e666e Mon Sep 17 00:00:00 2001 From: David Hill Date: Thu, 12 Jul 2018 09:30:03 -0500 Subject: [PATCH 086/123] add system name option --- oamapps/postConfigure/quick_installer_amazon.sh | 1 + oamapps/postConfigure/quick_installer_multi_server.sh | 1 + oamapps/postConfigure/quick_installer_single_server.sh | 1 + 3 files changed, 3 insertions(+) mode change 100644 => 100755 oamapps/postConfigure/quick_installer_amazon.sh mode change 100644 => 100755 oamapps/postConfigure/quick_installer_multi_server.sh mode change 100644 => 100755 oamapps/postConfigure/quick_installer_single_server.sh diff --git a/oamapps/postConfigure/quick_installer_amazon.sh b/oamapps/postConfigure/quick_installer_amazon.sh old mode 100644 new mode 100755 index f867aa5a9..c71dc8920 --- a/oamapps/postConfigure/quick_installer_amazon.sh +++ b/oamapps/postConfigure/quick_installer_amazon.sh @@ -33,6 +33,7 @@ for arg in "$@"; do echo "--um-count=x Number of um instances to create, optional" echo "--system-name=nnnn System Name, optional" echo "" + exit 1 else echo "./quick_installer_amazon.sh: unknown argument: $arg, enter --help for help" 1>&2 exit 1 diff --git a/oamapps/postConfigure/quick_installer_multi_server.sh b/oamapps/postConfigure/quick_installer_multi_server.sh old mode 100644 new mode 100755 index 239fca1e9..25d615d7e --- a/oamapps/postConfigure/quick_installer_multi_server.sh +++ b/oamapps/postConfigure/quick_installer_multi_server.sh @@ -37,6 +37,7 @@ for arg in "$@"; do echo "--dist-install Use Distributed Install, optional" echo "--system-name=nnnn System Name, optional" echo "" + exit 1 else echo "quick_installer_multi_server.sh: unknown argument: $arg, enter --help for help" 1>&2 exit 1 diff --git a/oamapps/postConfigure/quick_installer_single_server.sh b/oamapps/postConfigure/quick_installer_single_server.sh old mode 100644 new mode 100755 index bfd9b81c4..2603bcec8 --- a/oamapps/postConfigure/quick_installer_single_server.sh +++ b/oamapps/postConfigure/quick_installer_single_server.sh @@ -10,6 +10,7 @@ for arg in "$@"; do echo "" echo "Quick Installer for a Single Server MariaDB ColumnStore Install" echo "" + exit 1 else echo "quick_installer_multi_server.sh: ignoring unknown argument: $arg" 1>&2 fi From 400ae5178730f6cd7aa65719345dca8d372c8bbf Mon Sep 17 00:00:00 2001 From: David Hill Date: Thu, 12 Jul 2018 14:16:13 -0500 Subject: [PATCH 087/123] removed amazonInstaller and add 3 quick installers --- cpackEngineRPM.cmake | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpackEngineRPM.cmake b/cpackEngineRPM.cmake index be5df3462..d2b240029 100644 --- a/cpackEngineRPM.cmake +++ b/cpackEngineRPM.cmake @@ -181,7 +181,6 @@ SET(CPACK_RPM_platform_USER_FILELIST "/usr/local/mariadb/columnstore/bin/resourceReport.sh" "/usr/local/mariadb/columnstore/bin/hadoopReport.sh" "/usr/local/mariadb/columnstore/bin/alarmReport.sh" -"/usr/local/mariadb/columnstore/bin/amazonInstaller" "/usr/local/mariadb/columnstore/bin/remote_command_verify.sh" "/usr/local/mariadb/columnstore/bin/disable-rep-columnstore.sh" "/usr/local/mariadb/columnstore/bin/columnstore.service" @@ -217,6 +216,9 @@ SET(CPACK_RPM_platform_USER_FILELIST "/usr/local/mariadb/columnstore/bin/os_detect.sh" "/usr/local/mariadb/columnstore/bin/columnstoreClusterTester.sh" "/usr/local/mariadb/columnstore/bin/mariadb-command-line.sh" +"/usr/local/mariadb/columnstore/bin/quick_installer_single_server.sh" +"/usr/local/mariadb/columnstore/bin/quick_installer_multi_server.sh" +"/usr/local/mariadb/columnstore/bin/quick_installer_amazon.sh" ${ignored}) SET(CPACK_RPM_libs_USER_FILELIST From 1995e927888597f26706badc89e4eee2a26de388 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Tue, 17 Jul 2018 20:17:51 +0300 Subject: [PATCH 088/123] MCOL-1527 CEJ DML statements now return correct number of affected rows. --- dbcon/mysql/ha_calpont_impl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 6cc7da0fb..c1f37c9ac 100755 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -1691,14 +1691,14 @@ uint32_t doUpdateDelete(THD *thd) } else { - thd->set_row_count_func(dmlRowCount); + thd->set_row_count_func(dmlRowCount+thd->get_row_count_func()); } push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, errorMsg.c_str()); } else { // if (dmlRowCount != 0) //Bug 5117. Handling self join. - thd->set_row_count_func(dmlRowCount); + thd->set_row_count_func(dmlRowCount+thd->get_row_count_func()); //cout << " error status " << ci->rc << " and rowcount = " << dmlRowCount << endl; From e05250915b4e578a88828ece32d7996450a03f6f Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Thu, 19 Jul 2018 09:29:06 +0100 Subject: [PATCH 089/123] MCOL-1545 Remove getPool call. It is completely unused and it breaks compiling in GCC 8.1 --- utils/common/simpleallocator.h | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/common/simpleallocator.h b/utils/common/simpleallocator.h index 36a8678c6..4d1dce71b 100644 --- a/utils/common/simpleallocator.h +++ b/utils/common/simpleallocator.h @@ -119,7 +119,6 @@ class SimpleAllocator void construct(pointer ptr, const T& val) { new ((void *)ptr) T(val); } void destroy(pointer ptr) { ptr->T::~T(); } - SimplePool* getPool() { return fPool; } void setPool(SimplePool* pool) { fPool = pool; } boost::shared_ptr fPool; From 979d00a679266d95f3ca449d03310a5036493a1b Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Mon, 23 Jul 2018 10:40:18 +0100 Subject: [PATCH 090/123] MCOL-1579 Remove chmod of /dev/shm We appear to chmod /dev/shm as a workaround to a CentOS 7 bug that was fixed last year which accidentally set it to 755. If a user has /dev/shm locked down we should get them to fix it rather than modifying it ourselves. The code before this fixed changed a root install to 755 for /dev/shm which instantly broke anything using mmap() with MAP_SHARED as an unprivileged user. --- oam/install_scripts/post-install | 1 - procmon/main.cpp | 7 ------- procmon/processmonitor.cpp | 7 ------- 3 files changed, 15 deletions(-) diff --git a/oam/install_scripts/post-install b/oam/install_scripts/post-install index 4eaa28dda..5a61a9074 100755 --- a/oam/install_scripts/post-install +++ b/oam/install_scripts/post-install @@ -247,7 +247,6 @@ else $SUDO chmod 777 /tmp $installdir/bin/syslogSetup.sh --installdir=$installdir install > /tmp/syslog_install.log 2>&1 $SUDO chown $user:$user $installdir/etc/Columnstore.xml - $SUDO chmod -R 777 /dev/shm $SUDO mkdir /var/lock/subsys > /dev/null 2>&1 $SUDO chmod 777 /var/lock/subsys > /dev/null 2>&1 $SUDO rm -f /var/lock/subsys/mysql-Columnstore diff --git a/procmon/main.cpp b/procmon/main.cpp index b4e23a6e1..096cffcee 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -131,13 +131,6 @@ int main(int argc, char **argv) if (p && *p) USER = p; - // change permissions on /dev/shm - if ( !rootUser) - { - string cmd = "sudo chmod 777 /dev/shm >/dev/null 2>&1"; - system(cmd.c_str()); - } - // get and set locale language string systemLang = "C"; diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index 91f78e640..b0e0fc07f 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -988,13 +988,6 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO int requestStatus = oam::API_SUCCESS; log.writeLog(__LINE__, "MSG RECEIVED: Start All process request..."); - // change permissions on /dev/shm - string cmd = "chmod 755 /dev/shm >/dev/null 2>&1"; - if ( !rootUser) - cmd = "sudo chmod 777 /dev/shm >/dev/null 2>&1"; - - system(cmd.c_str()); - //start the mysqld daemon try { oam.actionMysqlCalpont(MYSQL_START); From db4279dac7558ed4e99ed3698d10363a798c108d Mon Sep 17 00:00:00 2001 From: David Hall Date: Mon, 23 Jul 2018 17:32:38 -0500 Subject: [PATCH 091/123] MCOL-1535 caseOperationType adjusted for new simple case order --- utils/funcexp/func_case.cpp | 70 +++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 38 deletions(-) diff --git a/utils/funcexp/func_case.cpp b/utils/funcexp/func_case.cpp index 2b5ae58c2..d3f0aa0cf 100644 --- a/utils/funcexp/func_case.cpp +++ b/utils/funcexp/func_case.cpp @@ -259,17 +259,10 @@ CalpontSystemCatalog::ColType caseOperationType(FunctionParm& fp, CalpontSystemCatalog::ColType& resultType, bool simpleCase) { - FunctionParm::size_type n = fp.size(); - - if (simpleCase) // simple case has an expression - n -= 1; // remove expression from count of expression_i + result_i - bool hasElse = ((n % 2) != 0); // if 1, then ELSE exist - if (hasElse) - --n; // n now is an even number + uint64_t simple = simpleCase ? 1 : 0; + bool hasElse = (((fp.size()-simple) % 2) != 0); // if 1, then ELSE exist uint64_t parmCount = hasElse ? (fp.size() - 2) : (fp.size() - 1); - uint64_t whereCount = hasElse ? (fp.size() - 2 + simpleCase) / 2 : (fp.size() - 1) / 2 + simpleCase; - - idbassert((n % 2) == 0); + uint64_t whereCount = hasElse ? (fp.size() - 2 + simple) / 2 : (fp.size() - 1) / 2 + simple; bool allStringO = true; bool allStringR = true; @@ -281,34 +274,25 @@ CalpontSystemCatalog::ColType caseOperationType(FunctionParm& fp, bool operation = true; for (uint64_t i = 0; i <= parmCount; i++) { - // operation or result type - operation = ((i > 0) && (i <= whereCount)); - - // the result type of ELSE, if exists. - if (i == n) + // for SimpleCase, we return the type of the case expression, + // which will always be in position 0. + if (i == 0 && simpleCase) { - if (!hasElse) - break; - - if (simpleCase) - { - // the case expression - if (fp[i]->data()->resultType().colDataType != CalpontSystemCatalog::CHAR && - fp[i]->data()->resultType().colDataType != CalpontSystemCatalog::TEXT && - fp[i]->data()->resultType().colDataType != CalpontSystemCatalog::VARCHAR) - { - PredicateOperator op; - op.setOpType(oct, fp[i]->data()->resultType()); - allStringO = false; - oct = op.operationType(); - } - - i += 1; - } - - operation = false; + if (fp[i]->data()->resultType().colDataType != CalpontSystemCatalog::CHAR && + fp[i]->data()->resultType().colDataType != CalpontSystemCatalog::TEXT && + fp[i]->data()->resultType().colDataType != CalpontSystemCatalog::VARCHAR) + { + PredicateOperator op; + op.setOpType(oct, fp[i]->data()->resultType()); + allStringO = false; + oct = op.operationType(); + } + i += 1; } + // operation or result type + operation = ((i > 0+simple) && (i <= whereCount)); + if (fp[i]->data()->resultType().colDataType != CalpontSystemCatalog::CHAR && fp[i]->data()->resultType().colDataType != CalpontSystemCatalog::TEXT && fp[i]->data()->resultType().colDataType != CalpontSystemCatalog::VARCHAR) @@ -317,9 +301,12 @@ CalpontSystemCatalog::ColType caseOperationType(FunctionParm& fp, PredicateOperator op; if (operation) { - op.setOpType(oct, fp[i]->data()->resultType()); - allStringO = false; - oct = op.operationType(); + if (!simpleCase) + { + op.setOpType(oct, fp[i]->data()->resultType()); + allStringO = false; + oct = op.operationType(); + } } // If any parm is of string type, the result type should be string. (same as if) @@ -395,6 +382,13 @@ bool Func_simple_case::getBoolVal(Row& row, if (isNull) return joblist::BIGINTNULL; + ParseTree* lop = parm[i]->left(); + ParseTree* rop = parm[i]->right(); + if (lop && rop) + { + return (reinterpret_cast(parm[i]->data()))->getBoolVal(row, isNull, lop, rop); + } + return parm[i]->data()->getBoolVal(row, isNull); } From ec9069853e3aa3e46ea54984cbb982fbc62d0528 Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 24 Jul 2018 09:10:46 -0500 Subject: [PATCH 092/123] MCOL-1531 Remove commented fields from equality functions --- dbcon/execplan/aggregatecolumn.cpp | 2 -- dbcon/execplan/arithmeticcolumn.cpp | 4 ---- dbcon/execplan/constantcolumn.cpp | 2 -- dbcon/execplan/functioncolumn.cpp | 2 -- dbcon/execplan/returnedcolumn.cpp | 9 +-------- dbcon/execplan/simplecolumn.cpp | 6 ------ 6 files changed, 1 insertion(+), 24 deletions(-) diff --git a/dbcon/execplan/aggregatecolumn.cpp b/dbcon/execplan/aggregatecolumn.cpp index b6262f998..da50c983d 100644 --- a/dbcon/execplan/aggregatecolumn.cpp +++ b/dbcon/execplan/aggregatecolumn.cpp @@ -260,8 +260,6 @@ bool AggregateColumn::operator==(const AggregateColumn& t) const } else if (fFunctionParms.get() != NULL || t.fFunctionParms.get() != NULL) return false; - //if (fAlias != t.fAlias) - // return false; if (fTableAlias != t.fTableAlias) return false; if (fData != t.fData) diff --git a/dbcon/execplan/arithmeticcolumn.cpp b/dbcon/execplan/arithmeticcolumn.cpp index 2c2eb5ace..4306f3191 100644 --- a/dbcon/execplan/arithmeticcolumn.cpp +++ b/dbcon/execplan/arithmeticcolumn.cpp @@ -345,10 +345,6 @@ bool ArithmeticColumn::operator==(const ArithmeticColumn& t) const } else if (fExpression != NULL || t.fExpression != NULL) return false; -// if (fAlias != t.fAlias) -// return false; -// if (fTableAlias != t.fTableAlias) -// return false; if (fData != t.fData) return false; return true; diff --git a/dbcon/execplan/constantcolumn.cpp b/dbcon/execplan/constantcolumn.cpp index fadd2dfc9..04caafc9c 100644 --- a/dbcon/execplan/constantcolumn.cpp +++ b/dbcon/execplan/constantcolumn.cpp @@ -316,8 +316,6 @@ bool ConstantColumn::operator==(const ConstantColumn& t) const return false; if (fType != t.fType) return false; -// if (fAlias != t.fAlias) -// return false; if (fData != t.fData) return false; if (fReturnAll != t.fReturnAll) diff --git a/dbcon/execplan/functioncolumn.cpp b/dbcon/execplan/functioncolumn.cpp index 2316557ac..f7467f40f 100644 --- a/dbcon/execplan/functioncolumn.cpp +++ b/dbcon/execplan/functioncolumn.cpp @@ -309,8 +309,6 @@ bool FunctionColumn::operator==(const FunctionColumn& t) const ++it, ++it2) if (**it != **it2) return false; -// if (fAlias != t.fAlias) -// return false; if (fTableAlias != t.fTableAlias) return false; if (fData != t.fData) diff --git a/dbcon/execplan/returnedcolumn.cpp b/dbcon/execplan/returnedcolumn.cpp index f3b7f9a10..d78817f06 100644 --- a/dbcon/execplan/returnedcolumn.cpp +++ b/dbcon/execplan/returnedcolumn.cpp @@ -173,12 +173,11 @@ void ReturnedColumn::unserialize(messageqcpp::ByteStream& b) bool ReturnedColumn::operator==(const ReturnedColumn& t) const { + // Not all fields are considered for a positive equality. if (fData != t.fData) return false; if (fCardinality != t.fCardinality) return false; - //if (fAlias != t.fAlias) - // return false; if (fDistinct != t.fDistinct) return false; if (fJoinInfo != t.fJoinInfo) @@ -187,20 +186,14 @@ bool ReturnedColumn::operator==(const ReturnedColumn& t) const return false; if (fNullsFirst != t.fNullsFirst) return false; - //if (fOrderPos != t.fOrderPos) - // return false; if (fInputIndex != t.fInputIndex) return false; if (fOutputIndex != t.fOutputIndex) return false; - //if (fSequence != t.fSequence) - // return false; if (fResultType != t.fResultType) return false; if (fOperationType != t.fOperationType) return false; - //if (fExpressionId != t.fExpressionId) - // return false; return true; } diff --git a/dbcon/execplan/simplecolumn.cpp b/dbcon/execplan/simplecolumn.cpp index 895334fc9..34eb4c919 100644 --- a/dbcon/execplan/simplecolumn.cpp +++ b/dbcon/execplan/simplecolumn.cpp @@ -333,7 +333,6 @@ void SimpleColumn::serialize(messageqcpp::ByteStream& b) const b << fViewName; b << (uint32_t) fOid; b << fData; - //b << fAlias; b << fTableAlias; b << (uint32_t) fSequence; b << static_cast(fIsInfiniDB); @@ -350,7 +349,6 @@ void SimpleColumn::unserialize(messageqcpp::ByteStream& b) b >> fViewName; b >> (uint32_t&) fOid; b >> fData; - //b >> fAlias; b >> fTableAlias; b >> (uint32_t&) fSequence; b >> reinterpret_cast< ByteStream::doublebyte&>(fIsInfiniDB); @@ -370,16 +368,12 @@ bool SimpleColumn::operator==(const SimpleColumn& t) const return false; if (fColumnName != t.fColumnName) return false; -// if (fIndexName != t.fIndexName) -// return false; if (fViewName != t.fViewName) return false; if (fOid != t.fOid) return false; if (data() != t.data()) return false; -// if (fAlias != t.fAlias) -// return false; if (fTableAlias != t.fTableAlias) return false; if (fAsc != t.fAsc) From 7ec1ccac5e660b1ffb026fd66dfaf02aecd96cec Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 24 Jul 2018 10:16:26 -0500 Subject: [PATCH 093/123] MCOL-1472 Add switch to handle nested case --- dbcon/mysql/ha_calpont_execplan.cpp | 7 ++++++- dbcon/mysql/ha_calpont_impl_if.h | 6 +++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 6113e7ca4..42d26108c 100755 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -1268,7 +1268,7 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip) ifp->functype() == Item_func::ISNOTNULL_FUNC) { ReturnedColumn* rhs = NULL; - if (!gwip->rcWorkStack.empty()) + if (!gwip->rcWorkStack.empty() && !gwip->inCaseStmt) { rhs = gwip->rcWorkStack.top(); gwip->rcWorkStack.pop(); @@ -3267,7 +3267,12 @@ FunctionColumn* buildCaseFunction(Item_func* item, gp_walk_info& gwi, bool& nonS if (funcName == "case_searched" && (i < arg_offset)) { + // MCOL-1472 Nested CASE with an ISNULL predicate. We don't want the predicate + // to pull off of rcWorkStack, so we set this inCaseStmt flag to tell it + // not to. + gwi.inCaseStmt = true; sptp.reset(buildParseTree((Item_func*)(item->arguments()[i]), gwi, nonSupport)); + gwi.inCaseStmt = false; if (!gwi.ptWorkStack.empty() && *gwi.ptWorkStack.top()->data() == sptp->data()) { gwi.ptWorkStack.pop(); diff --git a/dbcon/mysql/ha_calpont_impl_if.h b/dbcon/mysql/ha_calpont_impl_if.h index 9a4fd8bd7..ab7b81034 100644 --- a/dbcon/mysql/ha_calpont_impl_if.h +++ b/dbcon/mysql/ha_calpont_impl_if.h @@ -147,6 +147,9 @@ struct gp_walk_info int32_t recursionHWM; std::stack rcBookMarkStack; + // Kludge for MCOL-1472 + bool inCaseStmt; + gp_walk_info() : sessionid(0), fatalParseError(false), condPush(false), @@ -162,7 +165,8 @@ struct gp_walk_info lastSub(0), derivedTbCnt(0), recursionLevel(-1), - recursionHWM(0) + recursionHWM(0), + inCaseStmt(false) {} ~gp_walk_info() {} From 0e856ce9b0e8925938d1651c87bd571287df37fb Mon Sep 17 00:00:00 2001 From: drrtuy Date: Tue, 24 Jul 2018 23:05:09 +0300 Subject: [PATCH 094/123] MCOL-1551 CS now supports hostnames in Columnstore.xml. --- utils/messageqcpp/messagequeue.cpp | 81 +++++++++++++++++++------- utils/messageqcpp/messagequeue.h | 3 +- utils/messageqcpp/messagequeuepool.cpp | 6 +- utils/messageqcpp/messagequeuepool.h | 2 +- 4 files changed, 65 insertions(+), 27 deletions(-) diff --git a/utils/messageqcpp/messagequeue.cpp b/utils/messageqcpp/messagequeue.cpp index 4800faf13..085426d9c 100644 --- a/utils/messageqcpp/messagequeue.cpp +++ b/utils/messageqcpp/messagequeue.cpp @@ -152,26 +152,44 @@ void MessageQueueClient::shutdown() void MessageQueueClient::setup(bool syncProto) { - string otherEndIPStr; - string otherEndPortStr; - uint16_t port; + string otherEndIPStr; + string otherEndPortStr; + struct addrinfo hints, *servinfo; + int rc = 0; - otherEndIPStr = fConfig->getConfig(fOtherEnd, "IPAddr"); - otherEndPortStr = fConfig->getConfig(fOtherEnd, "Port"); + otherEndIPStr = fConfig->getConfig(fOtherEnd, "IPAddr"); + otherEndPortStr = fConfig->getConfig(fOtherEnd, "Port"); - if (otherEndIPStr.length() == 0) otherEndIPStr = "127.0.0.1"; + if (otherEndIPStr.length() == 0) otherEndIPStr = "127.0.0.1"; - if (otherEndPortStr.length() == 0 || (port = static_cast(strtol(otherEndPortStr.c_str(), 0, 0))) == 0) - { - string msg = "MessageQueueClient::MessageQueueClient: config error: Invalid/Missing Port attribute"; - throw runtime_error(msg); - } + if (otherEndPortStr.length() == 0 || static_cast(strtol(otherEndPortStr.c_str(), 0, 0)) == 0) + { + string msg = "MessageQueueClient::setup(): config error: Invalid/Missing Port attribute"; + throw runtime_error(msg); + } - memset(&fServ_addr, 0, sizeof(fServ_addr)); - sockaddr_in* sinp = reinterpret_cast(&fServ_addr); - sinp->sin_family = AF_INET; - sinp->sin_port = htons(port); - sinp->sin_addr.s_addr = inet_addr(otherEndIPStr.c_str()); + memset(&hints, 0, sizeof hints); + // ATM We support IPv4 only. + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + + + if( !(rc = getaddrinfo(otherEndIPStr.c_str(), otherEndPortStr.c_str(), &hints, &servinfo)) ) + { + memset(&fServ_addr, 0, sizeof(fServ_addr)); + sockaddr_in* sinp = reinterpret_cast(&fServ_addr); + *sinp = *reinterpret_cast(servinfo->ai_addr); + freeaddrinfo(servinfo); + } + else + { + string msg = "MessageQueueClient::setup(): "; + msg.append(gai_strerror(rc)); + logging::Message::Args args; + logging::LoggingID li(31); + args.add(msg); + fLogger.logMessage(logging::LOG_TYPE_ERROR, logging::M0000, args, li); + } #ifdef SKIP_IDB_COMPRESSION fClientSock.setSocketImpl(new InetStreamSocket()); @@ -197,15 +215,34 @@ MessageQueueClient::MessageQueueClient(const string& otherEnd, Config* config, b setup(syncProto); } -MessageQueueClient::MessageQueueClient(const string& ip, uint16_t port, bool syncProto) : +MessageQueueClient::MessageQueueClient(const string& dnOrIp, uint16_t port, bool syncProto) : fLogger(31), fIsAvailable(true) { - memset(&fServ_addr, 0, sizeof(fServ_addr)); - sockaddr_in* sinp = reinterpret_cast(&fServ_addr); - sinp->sin_family = AF_INET; - sinp->sin_port = htons(port); - sinp->sin_addr.s_addr = inet_addr(ip.c_str()); + struct addrinfo hints, *servinfo; + int rc = 0; + memset(&hints, 0, sizeof hints); + // ATM We support IPv4 only. + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + + if( !(rc = getaddrinfo(dnOrIp.c_str(), NULL, &hints, &servinfo)) ) + { + memset(&fServ_addr, 0, sizeof(fServ_addr)); + sockaddr_in* sinp = reinterpret_cast(&fServ_addr); + *sinp = *reinterpret_cast(servinfo->ai_addr); + sinp->sin_port = htons(port); + freeaddrinfo(servinfo); + } + else + { + string msg = "MessageQueueClient::MessageQueueClient(): "; + msg.append(gai_strerror(rc)); + logging::Message::Args args; + logging::LoggingID li(31); + args.add(msg); + fLogger.logMessage(logging::LOG_TYPE_ERROR, logging::M0000, args, li); + } #ifdef SKIP_IDB_COMPRESSION fClientSock.setSocketImpl(new InetStreamSocket()); #else diff --git a/utils/messageqcpp/messagequeue.h b/utils/messageqcpp/messagequeue.h index 8de4df398..e33e5cd84 100644 --- a/utils/messageqcpp/messagequeue.h +++ b/utils/messageqcpp/messagequeue.h @@ -33,6 +33,7 @@ #include #else #include +#include #endif #include "serversocket.h" @@ -182,7 +183,7 @@ public: * * construct a queue from this process to otherEnd on the given IP and Port. */ - EXPORT explicit MessageQueueClient(const std::string& ip, uint16_t port, bool syncProto=true); + EXPORT explicit MessageQueueClient(const std::string& dnOrIp, uint16_t port, bool syncProto=true); /** diff --git a/utils/messageqcpp/messagequeuepool.cpp b/utils/messageqcpp/messagequeuepool.cpp index 5b8c9862c..27459991f 100644 --- a/utils/messageqcpp/messagequeuepool.cpp +++ b/utils/messageqcpp/messagequeuepool.cpp @@ -36,12 +36,12 @@ static uint64_t TimeSpecToSeconds(struct timespec* ts) return (uint64_t)ts->tv_sec + (uint64_t)ts->tv_nsec / 1000000000; } -MessageQueueClient *MessageQueueClientPool::getInstance(const std::string &ip, uint64_t port) +MessageQueueClient *MessageQueueClientPool::getInstance(const std::string &dnOrIp, uint64_t port) { boost::mutex::scoped_lock lock(queueMutex); std::ostringstream oss; - oss << ip << "_" << port; + oss << dnOrIp << "_" << port; std::string searchString = oss.str(); MessageQueueClient *returnClient = MessageQueueClientPool::findInPool(searchString); @@ -58,7 +58,7 @@ MessageQueueClient *MessageQueueClientPool::getInstance(const std::string &ip, u clock_gettime(CLOCK_MONOTONIC, &now); uint64_t nowSeconds = TimeSpecToSeconds(&now); - newClientObject->client = new MessageQueueClient(ip, port); + newClientObject->client = new MessageQueueClient(dnOrIp, port); newClientObject->inUse = true; newClientObject->lastUsed = nowSeconds; clientMap.insert(std::pair(searchString, newClientObject)); diff --git a/utils/messageqcpp/messagequeuepool.h b/utils/messageqcpp/messagequeuepool.h index fc5576203..227b13b2c 100644 --- a/utils/messageqcpp/messagequeuepool.h +++ b/utils/messageqcpp/messagequeuepool.h @@ -41,7 +41,7 @@ class MessageQueueClientPool { public: static MessageQueueClient *getInstance(const std::string &module); - static MessageQueueClient *getInstance(const std::string &ip, uint64_t port); + static MessageQueueClient *getInstance(const std::string &dnOrIp, uint64_t port); static void releaseInstance(MessageQueueClient * client); static void deleteInstance(MessageQueueClient * client); static MessageQueueClient *findInPool(const std::string &search); From 926314bf17cbe6848dcff76c63c6f60d4e3f3346 Mon Sep 17 00:00:00 2001 From: David Hill Date: Thu, 26 Jul 2018 14:30:14 -0500 Subject: [PATCH 095/123] MCOL-1523 - enhance to failover module when ddl/dmlproc crashes --- dbcon/execplan/clientrotator.cpp | 32 +++++++++++++++++++++++ procmgr/processmanager.cpp | 4 +-- procmon/main.cpp | 4 +-- procmon/processmonitor.cpp | 45 ++++++++++++++++++++------------ procmon/processmonitor.h | 2 +- 5 files changed, 65 insertions(+), 22 deletions(-) diff --git a/dbcon/execplan/clientrotator.cpp b/dbcon/execplan/clientrotator.cpp index 3fc45a7e5..f71299840 100644 --- a/dbcon/execplan/clientrotator.cpp +++ b/dbcon/execplan/clientrotator.cpp @@ -49,6 +49,15 @@ using namespace logging; #include "clientrotator.h" +//#include "idb_mysql.h" + +/** Debug macro */ +#ifdef INFINIDB_DEBUG +#define IDEBUG(x) {x;} +#else +#define IDEBUG(x) {} +#endif + #define LOG_TO_CERR namespace execplan @@ -60,13 +69,36 @@ const uint64_t LOCAL_EXEMGR_PORT = 8601; string ClientRotator::getModule() { string installDir = startup::StartUp::installDir(); + + //Log to debug.log + LoggingID logid( 24, 0, 0); + string fileName = installDir + "/local/module"; + string module; ifstream moduleFile (fileName.c_str()); if (moduleFile.is_open()) + { getline (moduleFile, module); + } + else + { + { + logging::Message::Args args1; + logging::Message msg(1); + std::ostringstream oss; + oss << "ClientRotator::getModule open status2 =" << strerror(errno); + args1.add(oss.str()); + args1.add(fileName); + msg.format( args1 ); + Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_DEBUG, msg, logid); + } + } + moduleFile.close(); + return module; } diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index a6ca886af..0b8f4af59 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -827,7 +827,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) } if (opState == oam::MAN_OFFLINE || opState == oam::MAN_DISABLED - || opState == oam::AUTO_DISABLED ) { + || opState == oam::AUTO_DISABLED || opState == oam::AUTO_OFFLINE) { oam.dbrmctl("halt"); log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG); @@ -848,7 +848,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) } else { - log.writeLog(__LINE__, "ERROR: module not stopped", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "ERROR: module not stopped, state = " + oam.itoa(opState), LOG_TYPE_ERROR); status = API_FAILURE; break; } diff --git a/procmon/main.cpp b/procmon/main.cpp index b4e23a6e1..d6edd4ac7 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -1395,7 +1395,7 @@ static void chldHandleThread(MonitorConfig config) {} // check if process failover is needed due to process outage - aMonitor.checkProcessFailover((*listPtr).ProcessName); + aMonitor.checkModuleFailover((*listPtr).ProcessName); //check the db health if (DBFunctionalMonitorFlag == "y" ) { @@ -1470,7 +1470,7 @@ static void chldHandleThread(MonitorConfig config) (*listPtr).processID = 0; // check if process failover is needed due to process outage - aMonitor.checkProcessFailover((*listPtr).ProcessName); + aMonitor.checkModuleFailover((*listPtr).ProcessName); break; } else diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index 91f78e640..754d6ccf2 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -1174,7 +1174,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO // error in launching a process if ( requestStatus == oam::API_FAILURE && (*listPtr).RunType == SIMPLEX) - checkProcessFailover((*listPtr).ProcessName); + checkModuleFailover((*listPtr).ProcessName); else break; } @@ -4625,19 +4625,19 @@ std::string ProcessMonitor::sendMsgProcMon1( std::string module, ByteStream msg, } /****************************************************************************************** -* @brief checkProcessFailover +* @brief checkModuleFailover * -* purpose: check if process failover is needed due to a process outage +* purpose: check if module failover is needed due to a process outage * ******************************************************************************************/ -void ProcessMonitor::checkProcessFailover( std::string processName) +void ProcessMonitor::checkModuleFailover( std::string processName) { Oam oam; //force failover on certain processes if ( processName == "DDLProc" || processName == "DMLProc" ) { - log.writeLog(__LINE__, "checkProcessFailover: process failover, process outage of " + processName, LOG_TYPE_CRITICAL); + log.writeLog(__LINE__, "checkModuleFailover: process failover, process outage of " + processName, LOG_TYPE_CRITICAL); try { @@ -4656,26 +4656,37 @@ void ProcessMonitor::checkProcessFailover( std::string processName) systemprocessstatus.processstatus[i].ProcessOpState == oam::AUTO_OFFLINE || systemprocessstatus.processstatus[i].ProcessOpState == oam::FAILED ) { // found a AVAILABLE mate, start it - log.writeLog(__LINE__, "start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "Change UM Master to module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "Disable local UM module " + config.moduleName(), LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "Stop local UM module " + config.moduleName(), LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "Disable Local will Enable UM module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG); - try { - oam.setSystemConfig("PrimaryUMModuleName", systemprocessstatus.processstatus[i].Module); - - //distribute config file - oam.distributeConfigFile("system"); - sleep(1); - } - catch(...) {} + oam::DeviceNetworkConfig devicenetworkconfig; + oam::DeviceNetworkList devicenetworklist; + + devicenetworkconfig.DeviceName = config.moduleName(); + devicenetworklist.push_back(devicenetworkconfig); try { - oam.startProcess(systemprocessstatus.processstatus[i].Module, processName, FORCEFUL, ACK_YES); - log.writeLog(__LINE__, "success start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG); + oam.stopModule(devicenetworklist, oam::FORCEFUL, oam::ACK_YES); + log.writeLog(__LINE__, "success stopModule on module " + config.moduleName(), LOG_TYPE_DEBUG); + + try + { + oam.disableModule(devicenetworklist); + log.writeLog(__LINE__, "success disableModule on module " + config.moduleName(), LOG_TYPE_DEBUG); + } + catch (exception& e) + { + log.writeLog(__LINE__, "failed disableModule on module " + config.moduleName(), LOG_TYPE_ERROR); + } } catch (exception& e) { - log.writeLog(__LINE__, "failed start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_ERROR); + log.writeLog(__LINE__, "failed stopModule on module " + config.moduleName(), LOG_TYPE_ERROR); } + break; } } diff --git a/procmon/processmonitor.h b/procmon/processmonitor.h index 5a3145963..8a4c3c8be 100644 --- a/procmon/processmonitor.h +++ b/procmon/processmonitor.h @@ -487,7 +487,7 @@ public: /** *@brief check if module failover is needed due to a process outage */ - void checkProcessFailover( std::string processName); + void checkModuleFailover(std::string processName); /** *@brief run upgrade script From d852e7a4879b88d5ebb88ab60d0dc8d724ce5299 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Sat, 28 Jul 2018 14:16:48 +0300 Subject: [PATCH 096/123] Uses correct .so file for UDAF regr_avgx creation statement. --- dbcon/mysql/install_calpont_mysql.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbcon/mysql/install_calpont_mysql.sh b/dbcon/mysql/install_calpont_mysql.sh index e04371549..259e2d182 100755 --- a/dbcon/mysql/install_calpont_mysql.sh +++ b/dbcon/mysql/install_calpont_mysql.sh @@ -84,7 +84,7 @@ CREATE FUNCTION idbpartition RETURNS STRING soname 'libcalmysql.so'; CREATE FUNCTION idblocalpm RETURNS INTEGER soname 'libcalmysql.so'; CREATE FUNCTION mcssystemready RETURNS INTEGER soname 'libcalmysql.so'; CREATE FUNCTION mcssystemreadonly RETURNS INTEGER soname 'libcalmysql.so'; -CREATE AGGREGATE FUNCTION regr_avgx RETURNS REAL soname 'libcalmysql.dll'; +CREATE AGGREGATE FUNCTION regr_avgx RETURNS REAL soname 'libudf_mysql.so'; CREATE DATABASE IF NOT EXISTS infinidb_vtable; CREATE DATABASE IF NOT EXISTS infinidb_querystats; From a3862a42f79622e7c31959aba1e7cf946283c379 Mon Sep 17 00:00:00 2001 From: David Hill Date: Tue, 31 Jul 2018 15:51:32 -0500 Subject: [PATCH 097/123] um failover changes --- procmgr/processmanager.cpp | 80 ++++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 29 deletions(-) diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 0b8f4af59..0a054f9c3 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -373,7 +373,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) msg >> target; msg >> graceful; msg >> ackIndicator; - msg >> manualFlag; + msg >> manualFlag; switch (actionType) { case STOPMODULE: @@ -835,16 +835,18 @@ void processMSG(messageqcpp::IOSocket* cfIos) status = processManager.disableModule(moduleName, true); log.writeLog(__LINE__, "Disable Module Completed on " + moduleName, LOG_TYPE_INFO); + processManager.recycleProcess(moduleName); + + //check for SIMPLEX Processes on mate might need to be started + processManager.checkSimplexModule(moduleName); + //call dbrm control - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); +// oam.dbrmctl("reload"); +// log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); // resume the dbrm oam.dbrmctl("resume"); log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - - //check for SIMPLEX Processes on mate might need to be started - processManager.checkSimplexModule(moduleName); } else { @@ -3299,7 +3301,9 @@ int ProcessManager::disableModule(string target, bool manualFlag) // update state to MAN_DISABLED if (opState == oam::AUTO_DISABLED && newState == oam::MAN_DISABLED) { - + //removemodule to get proess in MAN_OFFLINE + stopModule(target, REMOVE, true); + try { oam.getSystemConfig(target, moduleconfig); @@ -3351,7 +3355,7 @@ int ProcessManager::disableModule(string target, bool manualFlag) setModuleState(target, newState); - //set Columnstore.xml enbale state + //set Columnstore.xml enable state setEnableState( target, SnewState); log.writeLog(__LINE__, "disableModule - setEnableState", LOG_TYPE_DEBUG); @@ -3435,18 +3439,18 @@ void ProcessManager::recycleProcess(string module, bool enableModule) restartProcessType("PrimProc"); sleep(1); - restartProcessType("ExeMgr"); - sleep(1); - restartProcessType("mysqld"); restartProcessType("WriteEngineServer"); sleep(1); - restartProcessType("DDLProc",module); + startProcessType("ExeMgr"); sleep(1); - restartProcessType("DMLProc",module); + startProcessType("DDLProc"); + sleep(1); + + startProcessType("DMLProc"); return; } @@ -4156,7 +4160,8 @@ int ProcessManager::stopProcessType( std::string processName, bool manualFlag ) { if ( systemprocessstatus.processstatus[i].ProcessName == processName) { //skip if in a COLD_STANDBY state - if ( systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY ) +// if ( systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY ) + if ( systemprocessstatus.processstatus[i].ProcessOpState != oam::ACTIVE ) continue; // found one, request restart of it @@ -4286,12 +4291,17 @@ int ProcessManager::restartProcessType( std::string processName, std::string ski { if ( systemprocessstatus.processstatus[i].ProcessName == processName ) { //skip if in a BUSY_INIT state - if ( systemprocessstatus.processstatus[i].ProcessOpState == oam::BUSY_INIT || - systemprocessstatus.processstatus[i].ProcessOpState == oam::AUTO_INIT || - systemprocessstatus.processstatus[i].ProcessOpState == oam::MAN_INIT || - ( systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY && !manualFlag ) ) - continue; +// if ( systemprocessstatus.processstatus[i].ProcessOpState == oam::BUSY_INIT || +// systemprocessstatus.processstatus[i].ProcessOpState == oam::MAN_OFFLINE || +// systemprocessstatus.processstatus[i].ProcessOpState == oam::AUTO_OFFLINE || +// systemprocessstatus.processstatus[i].ProcessOpState == oam::AUTO_INIT || +// systemprocessstatus.processstatus[i].ProcessOpState == oam::MAN_INIT || +// ( systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY && !manualFlag ) ) +// continue; + if ( systemprocessstatus.processstatus[i].ProcessOpState != oam::ACTIVE ) + continue; + if ( (processName.find("DDLProc") == 0 || processName.find("DMLProc") == 0) ) { string procModuleType = systemprocessstatus.processstatus[i].Module.substr(0,MAX_MODULE_TYPE_SIZE); @@ -7539,18 +7549,18 @@ void ProcessManager::checkSimplexModule(std::string moduleName) } if ( state == oam::COLD_STANDBY ) { - //set Primary UM Module - if ( systemprocessconfig.processconfig[j].ProcessName == "DDLProc" ) { + //process DDL/DMLProc + if ( systemprocessconfig.processconfig[j].ProcessName == "DDLProc") + { + setPMProcIPs((*pt).DeviceName); + + log.writeLog(__LINE__, "Set Primary UM Module = " + (*pt).DeviceName, LOG_TYPE_DEBUG); + oam.setSystemConfig("PrimaryUMModuleName", (*pt).DeviceName); //distribute config file distributeConfigFile("system"); sleep(2); - - //add MySQL Replication setup, if needed - log.writeLog(__LINE__, "Setup MySQL Replication for COLD_STANDBY DMLProc going ACTIVE", LOG_TYPE_DEBUG); - oam::DeviceNetworkList devicenetworklist; - processManager.setMySQLReplication(devicenetworklist, (*pt).DeviceName); } int status = processManager.startProcess((*pt).DeviceName, @@ -7559,12 +7569,24 @@ void ProcessManager::checkSimplexModule(std::string moduleName) if ( status == API_SUCCESS ) { log.writeLog(__LINE__, "checkSimplexModule: mate process started: " + (*pt).DeviceName + "/" + systemprocessconfig.processconfig[j].ProcessName, LOG_TYPE_DEBUG); - //check to see if DDL/DML IPs need to be updated - if ( systemprocessconfig.processconfig[j].ProcessName == "DDLProc" ) - setPMProcIPs((*pt).DeviceName); + status = processManager.startProcess((*pt).DeviceName, + "DMLProc", + FORCEFUL); + if ( status == API_SUCCESS ) { + log.writeLog(__LINE__, "checkSimplexModule: mate process started: " + (*pt).DeviceName + "/DMLProc", LOG_TYPE_DEBUG); + } + else + log.writeLog(__LINE__, "checkSimplexModule: mate process failed to start: " + (*pt).DeviceName + "/DMLProc", LOG_TYPE_DEBUG); } else log.writeLog(__LINE__, "checkSimplexModule: mate process failed to start: " + (*pt).DeviceName + "/" + systemprocessconfig.processconfig[j].ProcessName, LOG_TYPE_DEBUG); + + //setup new MariaDB Replication Master + if ( systemprocessconfig.processconfig[j].ProcessName == "DMLProc" ) { + log.writeLog(__LINE__, "Setup MySQL Replication for COLD_STANDBY DMLProc going ACTIVE", LOG_TYPE_DEBUG); + oam::DeviceNetworkList devicenetworklist; + processManager.setMySQLReplication(devicenetworklist, (*pt).DeviceName); + } } else { // if found ACTIVE, skip to next process From d86fabff653e7fbff234bd97f725be1da668f344 Mon Sep 17 00:00:00 2001 From: David Hill Date: Wed, 1 Aug 2018 10:17:13 -0500 Subject: [PATCH 098/123] MCOL-1145/1146 - fix nonroot install lib issue --- oamapps/postConfigure/installer.cpp | 10 +++- oamapps/postConfigure/postConfigure.cpp | 57 +++++++++++-------- .../postConfigure/quick_installer_amazon.sh | 4 +- .../quick_installer_multi_server.sh | 4 +- .../quick_installer_single_server.sh | 2 +- 5 files changed, 45 insertions(+), 32 deletions(-) diff --git a/oamapps/postConfigure/installer.cpp b/oamapps/postConfigure/installer.cpp index 1cc84ae0f..17c312c27 100644 --- a/oamapps/postConfigure/installer.cpp +++ b/oamapps/postConfigure/installer.cpp @@ -813,7 +813,10 @@ int main(int argc, char *argv[]) cout << "Enter the following command to define MariaDB ColumnStore Alias Commands" << endl << endl; - cout << ". " + installDir + "/bin/columnstoreAlias" << endl << endl; + if ( !rootUser ) + cout << ". /etc/profile.d/columnstoreEnv.sh" << endl; + + cout << ". /etc/profile.d/columnstoreAlias.sh" << endl << endl; cout << "Enter 'mcsmysql' to access the MariaDB ColumnStore SQL console" << endl; cout << "Enter 'mcsadmin' to access the MariaDB ColumnStore Admin console" << endl << endl; @@ -829,7 +832,10 @@ int main(int argc, char *argv[]) cout << endl << "ERROR: MariaDB ColumnStore Process failed to start, check log files in /var/log/mariadb/columnstore" << endl; cout << "Enter the following command to define MariaDB ColumnStore Alias Commands" << endl << endl; - cout << ". " + installDir + "/bin/columnstoreAlias" << endl << endl; + if ( !rootUser ) + cout << ". /etc/profile.d/columnstoreEnv.sh" << endl; + + cout << ". /etc/profile.d/columnstoreAlias.sh" << endl << endl; cout << "Enter 'mcsmysql' to access the MariaDB ColumnStore SQL console" << endl; cout << "Enter 'mcsadmin' to access the MariaDB ColumnStore Admin console" << endl << endl; diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index 98227da9d..e0df9761a 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -1247,26 +1247,16 @@ int main(int argc, char *argv[]) //amazon install setup check bool amazonInstall = false; string cloud = oam::UnassignedName; - system("aws --version > /tmp/amazon.log 2>&1"); - - ifstream in("/tmp/amazon.log"); - - in.seekg(0, std::ios::end); - int size = in.tellg(); - if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not found")) + + if (!multi_server_quick_install) { - // not running on amazon with ec2-api-tools - if (amazon_quick_install) - { - cout << "ERROR: Amazon Quick Installer was specified, bu the AMazon CLI API packages isnt installed, exiting" << endl; - exit(1); - } + system("aws --version > /tmp/amazon.log 2>&1"); - amazonInstall = false; - } - else - { - if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not installed")) + ifstream in("/tmp/amazon.log"); + + in.seekg(0, std::ios::end); + int size = in.tellg(); + if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not found")) { // not running on amazon with ec2-api-tools if (amazon_quick_install) @@ -1278,9 +1268,23 @@ int main(int argc, char *argv[]) amazonInstall = false; } else - amazonInstall = true; - } + { + if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not installed")) + { + // not running on amazon with ec2-api-tools + if (amazon_quick_install) + { + cout << "ERROR: Amazon Quick Installer was specified, bu the AMazon CLI API packages isnt installed, exiting" << endl; + exit(1); + } + amazonInstall = false; + } + else + amazonInstall = true; + } + } + try { cloud = sysConfig->getConfig(InstallSection, "Cloud"); } @@ -3641,9 +3645,6 @@ int main(int argc, char *argv[]) } //set mysql replication, if wasn't setup before on system -// if ( ( mysqlRep && pmwithum ) || -// ( mysqlRep && (umNumber > 1) ) || -// ( mysqlRep && (pmNumber > 1) && (IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM) ) ) if ( mysqlRep ) { cout << endl << "Run MariaDB ColumnStore Replication Setup.. "; @@ -3665,7 +3666,10 @@ int main(int argc, char *argv[]) cout << "Enter the following command to define MariaDB ColumnStore Alias Commands" << endl << endl; - cout << ". " + installDir + "/bin/columnstoreAlias" << endl << endl; + if ( !rootUser ) + cout << ". /etc/profile.d/columnstoreEnv.sh" << endl; + + cout << ". /etc/profile.d/columnstoreAlias.sh" << endl << endl; cout << "Enter 'mcsmysql' to access the MariaDB ColumnStore SQL console" << endl; cout << "Enter 'mcsadmin' to access the MariaDB ColumnStore Admin console" << endl << endl; @@ -3682,7 +3686,10 @@ int main(int argc, char *argv[]) cout << "Enter the following command to define MariaDB ColumnStore Alias Commands" << endl << endl; - cout << ". " + installDir + "/bin/columnstoreAlias" << endl << endl; + if ( !rootUser ) + cout << ". /etc/profile.d/columnstoreEnv.sh" << endl; + + cout << ". /etc/profile.d/columnstoreAlias.sh" << endl << endl; cout << "Enter 'mcsmysql' to access the MariaDB ColumnStore SQL console" << endl; cout << "Enter 'mcsadmin' to access the MariaDB ColumnStore Admin console" << endl << endl; diff --git a/oamapps/postConfigure/quick_installer_amazon.sh b/oamapps/postConfigure/quick_installer_amazon.sh index c71dc8920..4a1df903c 100755 --- a/oamapps/postConfigure/quick_installer_amazon.sh +++ b/oamapps/postConfigure/quick_installer_amazon.sh @@ -74,8 +74,8 @@ else echo "${bold}Run postConfigure script${normal}" echo "" if [[ $umCount = "" ]]; then - $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qa -pm-count $pmCount $systemName + . /etc/profile.d/columnstoreEnv.sh;$HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qa -pm-count $pmCount $systemName else - $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qa -pm-count $pmCount -um-count $umCount $systemName + . /etc/profile.d/columnstoreEnv.sh;$HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qa -pm-count $pmCount -um-count $umCount $systemName fi fi diff --git a/oamapps/postConfigure/quick_installer_multi_server.sh b/oamapps/postConfigure/quick_installer_multi_server.sh index 25d615d7e..dbb603220 100755 --- a/oamapps/postConfigure/quick_installer_multi_server.sh +++ b/oamapps/postConfigure/quick_installer_multi_server.sh @@ -78,8 +78,8 @@ else echo "${bold}Run postConfigure script${normal}" echo "" if [[ $umIpAddrs = "" ]]; then - $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qm -pm-ip-addrs $pmIpAddrs $nonDistrubutedInstall $systemName + . /etc/profile.d/columnstoreEnv.sh;$HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qm -pm-ip-addrs $pmIpAddrs $nonDistrubutedInstall $systemName else - $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qm -pm-ip-addrs $pmIpAddrs -um-ip-addrs $umIpAddrs $nonDistrubutedInstall $systemName + . /etc/profile.d/columnstoreEnv.sh;$HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qm -pm-ip-addrs $pmIpAddrs -um-ip-addrs $umIpAddrs $nonDistrubutedInstall $systemName fi fi diff --git a/oamapps/postConfigure/quick_installer_single_server.sh b/oamapps/postConfigure/quick_installer_single_server.sh index 2603bcec8..432b395c4 100755 --- a/oamapps/postConfigure/quick_installer_single_server.sh +++ b/oamapps/postConfigure/quick_installer_single_server.sh @@ -30,5 +30,5 @@ else $HOME/mariadb/columnstore/bin/post-install --installdir=$HOME/mariadb/columnstore echo "Run postConfigure script" echo "" - $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qs + . /etc/profile.d/columnstoreEnv.sh; $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qs fi From 8043674432a0e0c5f4ca4af4acc2cac0ecee561b Mon Sep 17 00:00:00 2001 From: David Hill Date: Wed, 1 Aug 2018 16:34:40 -0500 Subject: [PATCH 099/123] MCOL-1591 - add umask test to tester --- .../clusterTester/columnstoreClusterTester.sh | 156 +++++++++++++++--- utils/clusterTester/os_detect.sh | 2 +- 2 files changed, 132 insertions(+), 26 deletions(-) diff --git a/utils/clusterTester/columnstoreClusterTester.sh b/utils/clusterTester/columnstoreClusterTester.sh index 9f8f3e9a3..f68d16c55 100755 --- a/utils/clusterTester/columnstoreClusterTester.sh +++ b/utils/clusterTester/columnstoreClusterTester.sh @@ -10,7 +10,7 @@ CHECK=true REPORTPASS=true LOGFILE="" -OS_LIST=("centos6" "centos7" "debian8" "debian9" "suse12" "ubuntu16") +OS_LIST=("centos6" "centos7" "debian8" "debian9" "suse12" "ubuntu16" "ubuntu18") NODE_IPADDRESS="" @@ -37,7 +37,7 @@ checkContinue() { } ### -# Print Fucntions +# Print Functions ### helpPrint () { @@ -57,7 +57,7 @@ helpPrint () { echo "" echo "Additional information on Tool is documented at:" echo "" - echo "https://mariadb.com/kb/en/mariadb/*****/" + echo "https://mariadb.com/kb/en/library/mariadb-columnstore-cluster-test-tool/" echo "" echo "Items that are checked:" echo " Node Ping test" @@ -65,6 +65,7 @@ helpPrint () { echo " ColumnStore Port test" echo " OS version" echo " Locale settings" + echo " Umask settings" echo " Firewall settings" echo " Date/time settings" echo " Dependent packages installed" @@ -326,16 +327,18 @@ checkSSH() rc="$?" if [ $rc -eq 0 ] || ( [ $rc -eq 2 ] && [ $OS == "suse12" ] ) ; then if [ $PASSWORD == "ssh" ] ; then - echo $ipadd " Node Passed SSH login test using ssh-keys" + echo $ipadd " Node Passed SSH login test using ssh-keys" else - echo $ipadd " Node Passed SSH login test using user password" + echo $ipadd " Node Passed SSH login test using user password" fi else if [ $PASSWORD == "ssh" ] ; then - echo $ipadd " Node ${bold}Failed${normal} SSH login test using ssh-keys" + echo $ipadd " Node ${bold}Failed${normal} SSH login test using ssh-keys" else - echo $ipadd " Node ${bold}Failed${normal} SSH login test using user password" + echo $ipadd " Node ${bold}Failed${normal} SSH login test using user password" fi + + echo "Error - Fix the SSH login issue and rerun test" exit 1 fi done @@ -489,12 +492,47 @@ checkLocale() fi } -checkSELINUX() +checkLocalUMASK() +{ + # UMASK check + # + echo "" + echo "** Run Local UMASK check" + echo "" + + pass=true + filename=UMASKtest + + rm -f $filename + touch $filename + permission=$(stat -c "%A" "$filename") + result=${permission:4:1} + if [ ${result} == "r" ] ; then + result=${permission:7:1} + if [ ${result} == "r" ] ; then + echo "UMASK local setting test passed" + else + echo "${bold}Warning${normal}, UMASK test failed, check local UMASK setting. Requirement is set to 0022" + pass=false + fi + else + echo "${bold}Warning${normal}, UMASK test failed, check local UMASK setting. Requirement is set to 0022" + pass=false + fi + + if ! $pass; then + checkContinue + fi + + rm -f $filename +} + +checkLocalSELINUX() { # SELINUX check # echo "" - echo "** Run SELINUX check" + echo "** Run Local SELINUX check" echo "" pass=true @@ -511,21 +549,86 @@ checkSELINUX() echo "Local Node SELINUX setting is Not Enabled" fi - for ipadd in "${NODE_IPADDRESS[@]}"; do - `$COLUMNSTORE_INSTALL_DIR/bin/remote_scp_get.sh $ipadd $PASSWORD /etc/selinux/config > /tmp/remote_scp_get_check 2>&1` - if [ "$?" -ne 0 ]; then - echo "$ipadd Node SELINUX setting is Not Enabled" - else - `cat config | grep SELINUX | grep enforcing > /tmp/selinux_check 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Warning${normal}, $ipadd SELINUX setting is Enabled, check port test results" - pass=false - else - echo "$ipadd Node SELINUX setting is Not Enabled" - fi - `rm -f config` - fi - done + if ! $pass; then + checkContinue + fi +} + +checkUMASK() +{ + # UMASK check + # + echo "" + echo "** Run UMASK check" + echo "" + + pass=true + + for ipadd in "${NODE_IPADDRESS[@]}"; do + `$COLUMNSTORE_INSTALL_DIR/bin/remote_command.sh $ipadd $PASSWORD 'rm -f UMASKtest;touch UMASKtest;echo $(stat -c "%A" "UMASKtest") > test.log' > /tmp/remote_command_check 2>&1` + if [ "$?" -eq 0 ]; then + `$COLUMNSTORE_INSTALL_DIR/bin/remote_scp_get.sh $ipadd Calpont1 test.log >> /tmp/remote_scp_get 2>&1` + if [ "$?" -eq 0 ]; then + permission=`cat test.log` + result=${permission:4:1} + if [ ${result} == "r" ] ; then + result=${permission:7:1} + if [ ${result} == "r" ] ; then + echo "$ipadd Node UMASK setting test passed" + else + echo "${bold}Warning${normal}, $ipadd Node UMASK test failed, check UMASK setting. Requirement is set to 0022" + pass=false + fi + else + echo "${bold}Warning${normal}, $ipadd Node UMASK test failed, check UMASK setting. Requirement is set to 0022" + pass=false + fi + else + echo "${bold}Warning${normal}, $ipadd UMASK test failed, remote_scp_get.sh error, check /tmp/remote_scp_get" + pass=false + fi + else + echo "${bold}Warning${normal}, $ipadd UMASK test failed, remote_command.sh error, check /tmp/remote_command_check" + pass=false + fi + `rm -f test.log` + done + + if ! $pass; then + checkContinue + fi + + rm -f $filename +} + +checkSELINUX() +{ + # SELINUX check + # + echo "" + echo "** Run SELINUX check" + echo "" + + pass=true + for ipadd in "${NODE_IPADDRESS[@]}"; do + `$COLUMNSTORE_INSTALL_DIR/bin/remote_scp_get.sh $ipadd $PASSWORD /etc/selinux/config > /tmp/remote_scp_get_check 2>&1` + if [ "$?" -ne 0 ]; then + echo "$ipadd Node SELINUX setting is Not Enabled" + else + `cat config | grep SELINUX | grep enforcing > /tmp/selinux_check 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Warning${normal}, $ipadd SELINUX setting is Enabled, check port test results" + pass=false + else + echo "$ipadd Node SELINUX setting is Not Enabled" + fi + `rm -f config` + fi + done + + if ! $pass; then + checkContinue + fi } checkFirewalls() @@ -949,7 +1052,7 @@ checkPackages() declare -a UBUNTU_PKG=("libboost-all-dev" "expect" "libdbi-perl" "perl" "openssl" "file" "sudo" "libreadline-dev" "rsync" "libsnappy1V5" "net-tools" "libnuma1" ) declare -a UBUNTU_PKG_NOT=("mariadb-server" "libmariadb18") - if [ "$OS" == "ubuntu16" ] ; then + if [ "$OS" == "ubuntu16" ] || [ "$OS" == "ubuntu18" ]; then if [ ! `which dpkg 2>/dev/null` ] ; then echo "${bold}Failed${normal}, Local Node ${bold}rpm${normal} package not installed" pass=false @@ -1307,12 +1410,15 @@ echo "" checkLocalOS checkLocalDir +checkLocalUMASK +checkLocalSELINUX if [ "$IPADDRESSES" != "" ]; then checkPing checkSSH checkRemoteDir checkOS checkLocale + checkUMASK checkSELINUX checkFirewalls checkPorts diff --git a/utils/clusterTester/os_detect.sh b/utils/clusterTester/os_detect.sh index 7930c0daf..be69e870e 100755 --- a/utils/clusterTester/os_detect.sh +++ b/utils/clusterTester/os_detect.sh @@ -29,7 +29,7 @@ detectOS () { echo Operating System name: $osPrettyName echo Operating System tag: $osTag case "$osTag" in - centos6|centos7|ubuntu16|debian8|suse12|debian9) + centos6|centos7|ubuntu16|debian8|suse12|debian9|ubuntu18) ;; *) echo OS not supported From 443a2867c41fe37c3393235bd853733c74a8027f Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Thu, 2 Aug 2018 14:52:15 +0100 Subject: [PATCH 100/123] MCOL-1385 Initial 10.3 support --- dbcon/execplan/treenode.h | 8 +- dbcon/mysql/CMakeLists.txt | 5 + dbcon/mysql/ha_calpont.cpp | 32 ++ dbcon/mysql/ha_calpont_ddl.cpp | 10 +- dbcon/mysql/ha_calpont_dml.cpp | 4 +- dbcon/mysql/ha_calpont_execplan.cpp | 514 +++++++++++++------------ dbcon/mysql/ha_calpont_impl.cpp | 52 +-- dbcon/mysql/ha_calpont_partition.cpp | 24 +- dbcon/mysql/ha_pseudocolumn.cpp | 2 +- dbcon/mysql/ha_view.cpp | 14 +- dbcon/mysql/ha_window_function.cpp | 2 +- dbcon/mysql/idb_mysql.h | 1 + dbcon/mysql/is_columnstore_columns.cpp | 4 +- dbcon/mysql/is_columnstore_extents.cpp | 6 +- dbcon/mysql/is_columnstore_files.cpp | 6 +- dbcon/mysql/is_columnstore_tables.cpp | 4 +- dbcon/mysql/sm.cpp | 1 + oam/install_scripts/post-install | 1 - procmon/main.cpp | 7 - procmon/processmonitor.cpp | 8 - utils/common/simpleallocator.h | 4 - utils/funcexp/funcexp.cpp | 2 + utils/funcexp/funcexpwrapper.cpp | 9 +- utils/funcexp/functor.h | 4 +- utils/funcexp/functor_str.h | 3 +- utils/rowgroup/rowgroup.h | 5 +- 26 files changed, 384 insertions(+), 348 deletions(-) diff --git a/dbcon/execplan/treenode.h b/dbcon/execplan/treenode.h index cef9579e9..d43239563 100644 --- a/dbcon/execplan/treenode.h +++ b/dbcon/execplan/treenode.h @@ -39,6 +39,10 @@ #include "calpontsystemcatalog.h" #include "exceptclasses.h" #include "dataconvert.h" + +// Workaround for my_global.h #define of isnan(X) causing a std::std namespace +using namespace std; + namespace messageqcpp { class ByteStream; @@ -594,7 +598,7 @@ inline const std::string& TreeNode::getStrVal() int exponent = (int)floor(log10( fabs(fResult.floatVal))); // This will round down the exponent double base = fResult.floatVal * pow(10, -1.0 * exponent); - if (std::isnan(exponent) || std::isnan(base)) + if (isnan(exponent) || isnan(base)) { snprintf(tmp, 312, "%f", fResult.floatVal); fResult.strVal = removeTrailing0(tmp, 312); @@ -629,7 +633,7 @@ inline const std::string& TreeNode::getStrVal() int exponent = (int)floor(log10( fabs(fResult.doubleVal))); // This will round down the exponent double base = fResult.doubleVal * pow(10, -1.0 * exponent); - if (std::isnan(exponent) || std::isnan(base)) + if (isnan(exponent) || isnan(base)) { snprintf(tmp, 312, "%f", fResult.doubleVal); fResult.strVal = removeTrailing0(tmp, 312); diff --git a/dbcon/mysql/CMakeLists.txt b/dbcon/mysql/CMakeLists.txt index 104e6bf6e..ae8f30622 100644 --- a/dbcon/mysql/CMakeLists.txt +++ b/dbcon/mysql/CMakeLists.txt @@ -27,10 +27,12 @@ add_library(calmysql SHARED ${libcalmysql_SRCS}) target_link_libraries(calmysql ${ENGINE_LDFLAGS} ${ENGINE_WRITE_LIBS} ${NETSNMP_LIBRARIES} ${SERVER_BUILD_INCLUDE_DIR}/../libservices/libmysqlservices.a threadpool) +SET_TARGET_PROPERTIES(calmysql PROPERTIES LINK_FLAGS "${calmysql_link_flags} -Wl,-E") set_target_properties(calmysql PROPERTIES VERSION 1.0.0 SOVERSION 1) SET ( is_columnstore_tables_SRCS is_columnstore_tables.cpp + sm.cpp ) add_library(is_columnstore_tables SHARED ${is_columnstore_tables_SRCS}) @@ -42,6 +44,7 @@ set_target_properties(is_columnstore_tables PROPERTIES VERSION 1.0.0 SOVERSION 1 SET ( is_columnstore_columns_SRCS is_columnstore_columns.cpp + sm.cpp ) add_library(is_columnstore_columns SHARED ${is_columnstore_columns_SRCS}) @@ -53,6 +56,7 @@ set_target_properties(is_columnstore_columns PROPERTIES VERSION 1.0.0 SOVERSION SET ( is_columnstore_extents_SRCS is_columnstore_extents.cpp + sm.cpp ) add_library(is_columnstore_extents SHARED ${is_columnstore_extents_SRCS}) @@ -64,6 +68,7 @@ set_target_properties(is_columnstore_extents PROPERTIES VERSION 1.0.0 SOVERSION SET ( is_columnstore_files_SRCS is_columnstore_files.cpp + sm.cpp ) add_library(is_columnstore_files SHARED ${is_columnstore_files_SRCS}) diff --git a/dbcon/mysql/ha_calpont.cpp b/dbcon/mysql/ha_calpont.cpp index 8d3996fe2..6167054c5 100644 --- a/dbcon/mysql/ha_calpont.cpp +++ b/dbcon/mysql/ha_calpont.cpp @@ -1299,4 +1299,36 @@ mysql_declare_plugin(columnstore) 0 /* config flags */ } mysql_declare_plugin_end; +maria_declare_plugin(columnstore) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &columnstore_storage_engine, + "Columnstore", + "MariaDB", + "Columnstore storage engine", + PLUGIN_LICENSE_GPL, + columnstore_init_func, + columnstore_done_func, + 0x0100, /* 1.0 */ + NULL, /* status variables */ + calpont_system_variables, /* system variables */ + "1.0", /* string version */ + MariaDB_PLUGIN_MATURITY_STABLE /* maturity */ +}, +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &infinidb_storage_engine, + "InfiniDB", + "MariaDB", + "Columnstore storage engine (deprecated: use columnstore)", + PLUGIN_LICENSE_GPL, + infinidb_init_func, + infinidb_done_func, + 0x0100, /* 1.0 */ + NULL, /* status variables */ + calpont_system_variables, /* system variables */ + "1.0", /* string version */ + MariaDB_PLUGIN_MATURITY_STABLE /* maturity */ +} +maria_declare_plugin_end; diff --git a/dbcon/mysql/ha_calpont_ddl.cpp b/dbcon/mysql/ha_calpont_ddl.cpp index 4d5e0508d..2d7c52563 100644 --- a/dbcon/mysql/ha_calpont_ddl.cpp +++ b/dbcon/mysql/ha_calpont_ddl.cpp @@ -2045,7 +2045,7 @@ int ha_calpont_impl_create_(const char* name, TABLE* table_arg, HA_CREATE_INFO* } // @bug 3908. error out primary key for now. - if (table_arg->key_info && table_arg->key_info->name && string(table_arg->key_info->name) == "PRIMARY") + if (table_arg->key_info && table_arg->key_info->name.length && string(table_arg->key_info->name.str) == "PRIMARY") { string emsg = logging::IDBErrorInfo::instance()->errorMsg(ERR_CONSTRAINTS); setError(thd, ER_CHECK_NOT_IMPLEMENTED, emsg); @@ -2214,8 +2214,8 @@ int ha_calpont_impl_rename_table_(const char* from, const char* to, cal_connecti stmt = "alter table `" + fromPair.second + "` rename to `" + toPair.second + "`;"; string db; - if ( thd->db ) - db = thd->db; + if ( thd->db.length ) + db = thd->db.str; else if ( fromPair.first.length() != 0 ) db = fromPair.first; else @@ -2245,8 +2245,8 @@ extern "C" THD* thd = current_thd; string db(""); - if ( thd->db ) - db = thd->db; + if ( thd->db.length ) + db = thd->db.str; int compressiontype = thd->variables.infinidb_compression_type; diff --git a/dbcon/mysql/ha_calpont_dml.cpp b/dbcon/mysql/ha_calpont_dml.cpp index 67aab9721..f2b52a9d9 100644 --- a/dbcon/mysql/ha_calpont_dml.cpp +++ b/dbcon/mysql/ha_calpont_dml.cpp @@ -121,7 +121,7 @@ int buildBuffer(uchar* buf, string& buffer, int& columns, TABLE* table) columns++; - cols.append((*field)->field_name); + cols.append((*field)->field_name.str); if (ptr == end_ptr) { @@ -236,7 +236,7 @@ uint32_t buildValueList (TABLE* table, cal_connection_info& ci ) } } - ci.colNameList.push_back((*field)->field_name); + ci.colNameList.push_back((*field)->field_name.str); columnPos++; } diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 59eecb631..596fdb806 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -51,7 +51,6 @@ using namespace std; #include #include #include -using namespace boost; #include "errorids.h" using namespace logging; @@ -136,7 +135,7 @@ namespace { string lower(string str) { - algorithm::to_lower(str); + boost::algorithm::to_lower(str); return str; } } @@ -197,9 +196,9 @@ ReturnedColumn* findCorrespTempField(Item_ref* item, gp_walk_info& gwi, bool clo uint32_t i; for (i = 0; i < gwi.returnedCols.size(); i++) { - if (item->ref[0] && item->ref[0]->name && + if (item->ref[0] && item->ref[0]->name.length && gwi.returnedCols[i]->alias().c_str() && - !strcasecmp(item->ref[0]->name, gwi.returnedCols[i]->alias().c_str())) + !strcasecmp(item->ref[0]->name.str, gwi.returnedCols[i]->alias().c_str())) { if (clone) result = gwi.returnedCols[i]->clone(); @@ -224,13 +223,13 @@ string getViewName(TABLE_LIST* table_ptr) if (view) { if (!view->derived) - viewName = view->alias; + viewName = view->alias.str; while ((view = view->referencing_view)) { if (view->derived) continue; - viewName = view->alias + string(".") + viewName; + viewName = view->alias.str + string(".") + viewName; } } @@ -246,7 +245,7 @@ void debug_walk(const Item* item, void* arg) { Item_field* ifp = (Item_field*)item; cerr << "FIELD_ITEM: " << (ifp->db_name ? ifp->db_name : "") << '.' << bestTableName(ifp) << - '.' << ifp->field_name << endl; + '.' << ifp->field_name.str << endl; break; } @@ -255,7 +254,7 @@ void debug_walk(const Item* item, void* arg) Item_int* iip = (Item_int*)item; cerr << "INT_ITEM: "; - if (iip->name) cerr << iip->name << " (from name string)" << endl; + if (iip->name.length) cerr << iip->name.str << " (from name string)" << endl; else cerr << iip->val_int() << endl; break; @@ -373,7 +372,7 @@ void debug_walk(const Item* item, void* arg) while ((item = it++)) { Field* equal_field = it.get_curr_field(); - cerr << equal_field->field_name << endl; + cerr << equal_field->field_name.str << endl; } break; @@ -525,12 +524,12 @@ void debug_walk(const Item* item, void* arg) case Item::SUM_FUNC_ITEM: { Item_sum* isp = (Item_sum*)item; - char* item_name = item->name; + char* item_name = const_cast(item->name.str); // MCOL-1052 This is an extended SELECT list item - if (!item_name && isp->get_arg_count() && isp->get_arg(0)->name) + if (!item_name && isp->get_arg_count() && isp->get_arg(0)->name.length) { - item_name = isp->get_arg(0)->name; + item_name = const_cast(isp->get_arg(0)->name.str); } else if (!item_name && isp->get_arg_count() && isp->get_arg(0)->type() == Item::INT_ITEM) @@ -634,7 +633,7 @@ void debug_walk(const Item* item, void* arg) // could be used on alias. // could also be used to tell correlated join (equal level). cerr << "CACHED REF FIELD_ITEM: " << ifp->db_name << '.' << bestTableName(ifp) << - '.' << ifp->field_name << endl; + '.' << ifp->field_name.str << endl; break; } else if (field->type() == Item::FUNC_ITEM) @@ -685,7 +684,7 @@ void debug_walk(const Item* item, void* arg) realType += '.'; realType += bestTableName(ifp); realType += '.'; - realType += ifp->field_name; + realType += ifp->field_name.str; break; } @@ -734,14 +733,14 @@ void debug_walk(const Item* item, void* arg) // MCOL-1052 The field referenced presumable came from // extended SELECT list. - if ( !ifp->field_name ) + if ( !ifp->field_name.str ) { - cerr << "REF extra FIELD_ITEM: " << ifp->name << endl; + cerr << "REF extra FIELD_ITEM: " << ifp->name.str << endl; } else { cerr << "REF FIELD_ITEM: " << ifp->db_name << '.' << bestTableName(ifp) << '.' << - ifp->field_name << endl; + ifp->field_name.str << endl; } break; @@ -826,7 +825,7 @@ void debug_walk(const Item* item, void* arg) // could be used on alias. // could also be used to tell correlated join (equal level). cerr << "CACHED FIELD_ITEM: " << ifp->db_name << '.' << bestTableName(ifp) << - '.' << ifp->field_name << endl; + '.' << ifp->field_name.str << endl; break; } else if (field->type() == Item::REF_ITEM) @@ -872,7 +871,7 @@ void debug_walk(const Item* item, void* arg) realType += '.'; realType += bestTableName(ifp); realType += '.'; - realType += ifp->field_name; + realType += ifp->field_name.str; break; } @@ -973,9 +972,9 @@ void buildNestedTableOuterJoin(gp_walk_info& gwi, TABLE_LIST* table_ptr) if (table->outer_join) { CalpontSystemCatalog::TableAliasName ta = make_aliasview( - (table->db ? table->db : ""), - (table->table_name ? table->table_name : ""), - (table->alias ? table->alias : ""), + (table->db.length ? table->db.str : ""), + (table->table_name.length ? table->table_name.str : ""), + (table->alias.length ? table->alias.str : ""), getViewName(table)); gwi.innerTables.insert(ta); } @@ -988,9 +987,9 @@ void buildNestedTableOuterJoin(gp_walk_info& gwi, TABLE_LIST* table_ptr) while ((tab = li++)) { CalpontSystemCatalog::TableAliasName ta = make_aliasview( - (tab->db ? tab->db : ""), - (tab->table_name ? tab->table_name : ""), - (tab->alias ? tab->alias : ""), + (tab->db.length ? tab->db.str : ""), + (tab->table_name.length ? tab->table_name.str : ""), + (tab->alias.length ? tab->alias.str : ""), getViewName(tab)); gwi.innerTables.insert(ta); } @@ -1037,9 +1036,9 @@ uint32_t buildOuterJoin(gp_walk_info& gwi, SELECT_LEX& select_lex) continue; CalpontSystemCatalog:: TableAliasName tan = make_aliasview( - (table_ptr->db ? table_ptr->db : ""), - (table_ptr->table_name ? table_ptr->table_name : ""), - (table_ptr->alias ? table_ptr->alias : ""), + (table_ptr->db.length ? table_ptr->db.str : ""), + (table_ptr->table_name.length ? table_ptr->table_name.str : ""), + (table_ptr->alias.length ? table_ptr->alias.str : ""), getViewName(table_ptr)); if (table_ptr->outer_join && table_ptr->on_expr) @@ -1055,9 +1054,9 @@ uint32_t buildOuterJoin(gp_walk_info& gwi, SELECT_LEX& select_lex) while ((table = li++)) { CalpontSystemCatalog::TableAliasName ta = make_aliasview( - (table->db ? table->db : ""), - (table->table_name ? table->table_name : ""), - (table->alias ? table->alias : ""), + (table->db.length ? table->db.str : ""), + (table->table_name.length ? table->table_name.str : ""), + (table->alias.length ? table->alias.str : ""), getViewName(table)); gwi_outer.innerTables.insert(ta); } @@ -1065,10 +1064,10 @@ uint32_t buildOuterJoin(gp_walk_info& gwi, SELECT_LEX& select_lex) #ifdef DEBUG_WALK_COND - if (table_ptr->alias) - cerr << table_ptr->alias ; - else if (table_ptr->alias) - cerr << table_ptr->alias; + if (table_ptr->alias.length) + cerr << table_ptr->alias.str; + else if (table_ptr->alias.length) + cerr << table_ptr->alias.str; cerr << " outer table expression: " << endl; expr->traverse_cond(debug_walk, &gwi_outer, Item::POSTFIX); @@ -1090,9 +1089,9 @@ uint32_t buildOuterJoin(gp_walk_info& gwi, SELECT_LEX& select_lex) while ((table = li++)) { CalpontSystemCatalog:: TableAliasName ta = make_aliasview( - (table->db ? table->db : ""), - (table->table_name ? table->table_name : ""), - (table->alias ? table->alias : ""), + (table->db.length ? table->db.str : ""), + (table->table_name.length ? table->table_name.str : ""), + (table->alias.length ? table->alias.str : ""), getViewName(table)); gwi_outer.innerTables.insert(ta); } @@ -1261,7 +1260,7 @@ bool buildRowColumnFilter(gp_walk_info* gwip, RowColumn* rhs, RowColumn* lhs, It logicOp = "or"; } - scoped_ptr lo(new LogicOperator(logicOp)); + boost::scoped_ptr lo(new LogicOperator(logicOp)); // 1st round. build the equivalent filters // two entries have been popped from the stack already: lhs and rhs @@ -2024,7 +2023,7 @@ SimpleColumn* buildSimpleColFromDerivedTable(gp_walk_info& gwi, Item_field* ifp) CalpontSystemCatalog::TableColName tcn = gwi.csc->colName(oidlist[j].objnum); CalpontSystemCatalog::ColType ct = gwi.csc->colType(oidlist[j].objnum); - if (strcasecmp(ifp->field_name, tcn.column.c_str()) == 0) + if (strcasecmp(ifp->field_name.str, tcn.column.c_str()) == 0) { // @bug4827. Remove the checking because outside tables could be the same // name as inner tables. This function is to identify column from a table, @@ -2046,7 +2045,7 @@ SimpleColumn* buildSimpleColFromDerivedTable(gp_walk_info& gwi, Item_field* ifp) sc->oid(oidlist[j].objnum); // @bug 3003. Keep column alias if it has. - sc->alias(ifp->is_autogenerated_name ? tcn.column : ifp->name); + sc->alias(ifp->is_autogenerated_name ? tcn.column : ifp->name.str); sc->tableAlias(lower(gwi.tbList[i].alias)); sc->viewName(lower(viewName)); @@ -2078,10 +2077,10 @@ SimpleColumn* buildSimpleColFromDerivedTable(gp_walk_info& gwi, Item_field* ifp) SimpleColumn* col = dynamic_cast(cols[j].get()); string alias = cols[j]->alias(); - if (strcasecmp(ifp->field_name, alias.c_str()) == 0 || + if (strcasecmp(ifp->field_name.str, alias.c_str()) == 0 || (col && alias.find(".") != string::npos && - (strcasecmp(ifp->field_name, col->columnName().c_str()) == 0 || - strcasecmp(ifp->field_name, (alias.substr(alias.find_last_of(".") + 1)).c_str()) == 0))) //@bug6066 + (strcasecmp(ifp->field_name.str, col->columnName().c_str()) == 0 || + strcasecmp(ifp->field_name.str, (alias.substr(alias.find_last_of(".") + 1)).c_str()) == 0))) //@bug6066 { // @bug4827. Remove the checking because outside tables could be the same // name as inner tables. This function is to identify column from a table, @@ -2103,7 +2102,7 @@ SimpleColumn* buildSimpleColFromDerivedTable(gp_walk_info& gwi, Item_field* ifp) sc->columnName(col->columnName()); // @bug 3003. Keep column alias if it has. - sc->alias(ifp->is_autogenerated_name ? cols[j]->alias() : ifp->name); + sc->alias(ifp->is_autogenerated_name ? cols[j]->alias() : ifp->name.str); sc->tableName(csep->derivedTbAlias()); sc->colPosition(j); string tableAlias(csep->derivedTbAlias()); @@ -2123,7 +2122,7 @@ SimpleColumn* buildSimpleColFromDerivedTable(gp_walk_info& gwi, Item_field* ifp) while (tblList) { - if (strcasecmp(tblList->alias, ifp->table_name) == 0) + if (strcasecmp(tblList->alias.str, ifp->table_name) == 0) { if (!tblList->outer_join) { @@ -2156,7 +2155,8 @@ SimpleColumn* buildSimpleColFromDerivedTable(gp_walk_info& gwi, Item_field* ifp) if (ifp->table_name) name += string(ifp->table_name) + "."; - name += ifp->name; + if (ifp->name.length) + name += ifp->name.str; args.add(name); gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_UNKNOWN_COL, args); } @@ -2417,7 +2417,7 @@ const string bestTableName(const Item_field* ifp) string field_table_table_name; if (ifp->cached_table) - field_table_table_name = ifp->cached_table->table_name; + field_table_table_name = ifp->cached_table->table_name.str; else if (ifp->field->table && ifp->field->table->s && ifp->field->table->s->table_name.str) field_table_table_name = ifp->field->table->s->table_name.str; @@ -2568,7 +2568,7 @@ SimpleColumn* getSmallestColumn(boost::shared_ptr csc, { // get the first column to project. @todo optimization to get the smallest one for foreign engine. Field* field = *(table->field); - SimpleColumn* sc = new SimpleColumn(table->s->db.str, table->s->table_name.str, field->field_name, tan.fIsInfiniDB, gwi.sessionid); + SimpleColumn* sc = new SimpleColumn(table->s->db.str, table->s->table_name.str, field->field_name.str, tan.fIsInfiniDB, gwi.sessionid); string alias(table->alias.ptr()); sc->tableAlias(lower(alias)); sc->isInfiniDB(false); @@ -3016,8 +3016,8 @@ ReturnedColumn* buildReturnedColumn(Item* item, gp_walk_info& gwi, bool& nonSupp } } - if (rc && item->name) - rc->alias(item->name); + if (rc && item->name.length) + rc->alias(item->name.str); return rc; } @@ -3041,8 +3041,8 @@ ArithmeticColumn* buildArithmeticColumn( ParseTree* lhs = 0, *rhs = 0; SRCP srcp; - if (item->name) - ac->alias(item->name); + if (item->name.length) + ac->alias(item->name.str); // argument_count() should generally be 2, except negate expression if (item->argument_count() == 2) @@ -3364,13 +3364,13 @@ ReturnedColumn* buildFunctionColumn( for (uint32_t i = 0; i < ifp->argument_count(); i++) { // group by clause try to see if the arguments are alias - if (gwi.clauseType == GROUP_BY && ifp->arguments()[i]->name) + if (gwi.clauseType == GROUP_BY && ifp->arguments()[i]->name.length) { uint32_t j = 0; for (; j < gwi.returnedCols.size(); j++) { - if (string (ifp->arguments()[i]->name) == gwi.returnedCols[j]->alias()) + if (string (ifp->arguments()[i]->name.str) == gwi.returnedCols[j]->alias()) { ReturnedColumn* rc = gwi.returnedCols[j]->clone(); rc->orderPos(j); @@ -3404,8 +3404,8 @@ ReturnedColumn* buildFunctionColumn( } // @bug 3039 - //if (isPredicateFunction(ifp->arguments()[i], &gwi) || ifp->arguments()[i]->has_subquery()) - if (ifp->arguments()[i]->has_subquery()) + //if (isPredicateFunction(ifp->arguments()[i], &gwi) || ifp->arguments()[i]->with_subquery()) + if (ifp->arguments()[i]->with_subquery()) { nonSupport = true; gwi.fatalParseError = true; @@ -3682,8 +3682,8 @@ ReturnedColumn* buildFunctionColumn( fc->resultType(ct); } - if (ifp->name) - fc->alias(ifp->name); + if (ifp->name.length) + fc->alias(ifp->name.str); // @3391. optimization. try to associate expression ID to the expression on the select list if (gwi.clauseType != SELECT) @@ -3725,8 +3725,14 @@ FunctionColumn* buildCaseFunction(Item_func* item, gp_walk_info& gwi, bool& nonS FuncExp* funcexp = FuncExp::instance(); string funcName = "case_simple"; - if (((Item_func_case*)item)->get_first_expr_num() == -1) + if (strcasecmp(((Item_func_case*)item)->case_type(), "searched") == 0) + { funcName = "case_searched"; + } +/* if (dynamic_cast(item)) + { + funcName = "case_searched"; + }*/ funcParms.reserve(item->argument_count()); // so buildXXXcolumn function will not pop stack. @@ -3910,7 +3916,7 @@ SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi) bool isInformationSchema = false; // @bug5523 - if (ifp->cached_table && strcmp(ifp->cached_table->db, "information_schema") == 0) + if (ifp->cached_table && strcmp(ifp->cached_table->db.str, "information_schema") == 0) isInformationSchema = true; // support FRPM subquery. columns from the derived table has no definition @@ -3932,7 +3938,7 @@ SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi) if (infiniDB) { ct = gwi.csc->colType( - gwi.csc->lookupOID(make_tcn(ifp->db_name, bestTableName(ifp), ifp->field_name))); + gwi.csc->lookupOID(make_tcn(ifp->db_name, bestTableName(ifp), ifp->field_name.str))); } else { @@ -3952,10 +3958,10 @@ SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi) { case CalpontSystemCatalog::TINYINT: if (ct.scale == 0) - sc = new SimpleColumn_INT<1>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_INT<1>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); else { - sc = new SimpleColumn_Decimal<1>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_Decimal<1>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); ct.colDataType = CalpontSystemCatalog::DECIMAL; } @@ -3963,10 +3969,10 @@ SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi) case CalpontSystemCatalog::SMALLINT: if (ct.scale == 0) - sc = new SimpleColumn_INT<2>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_INT<2>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); else { - sc = new SimpleColumn_Decimal<2>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_Decimal<2>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); ct.colDataType = CalpontSystemCatalog::DECIMAL; } @@ -3975,10 +3981,10 @@ SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi) case CalpontSystemCatalog::INT: case CalpontSystemCatalog::MEDINT: if (ct.scale == 0) - sc = new SimpleColumn_INT<4>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_INT<4>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); else { - sc = new SimpleColumn_Decimal<4>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_Decimal<4>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); ct.colDataType = CalpontSystemCatalog::DECIMAL; } @@ -3986,34 +3992,34 @@ SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi) case CalpontSystemCatalog::BIGINT: if (ct.scale == 0) - sc = new SimpleColumn_INT<8>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_INT<8>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); else { - sc = new SimpleColumn_Decimal<8>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_Decimal<8>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); ct.colDataType = CalpontSystemCatalog::DECIMAL; } break; case CalpontSystemCatalog::UTINYINT: - sc = new SimpleColumn_UINT<1>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_UINT<1>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); break; case CalpontSystemCatalog::USMALLINT: - sc = new SimpleColumn_UINT<2>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_UINT<2>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); break; case CalpontSystemCatalog::UINT: case CalpontSystemCatalog::UMEDINT: - sc = new SimpleColumn_UINT<4>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_UINT<4>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); break; case CalpontSystemCatalog::UBIGINT: - sc = new SimpleColumn_UINT<8>(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn_UINT<8>(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); break; default: - sc = new SimpleColumn(ifp->db_name, bestTableName(ifp), ifp->field_name, infiniDB, gwi.sessionid); + sc = new SimpleColumn(ifp->db_name, bestTableName(ifp), ifp->field_name.str, infiniDB, gwi.sessionid); } sc->resultType(ct); @@ -4030,7 +4036,7 @@ SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi) // view name sc->viewName(lower(getViewName(ifp->cached_table))); - sc->alias(ifp->name); + sc->alias(ifp->name.str); sc->isInfiniDB(infiniDB); if (!infiniDB && ifp->field) @@ -4138,8 +4144,8 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) ac = new AggregateColumn(gwi.sessionid); } - if (isp->name) - ac->alias(isp->name); + if (isp->name.length) + ac->alias(isp->name.str); if ((setAggOp(ac, isp))) { @@ -4257,7 +4263,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) } parm.reset(sc); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), parm)); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), parm)); TABLE_LIST* tmp = (ifp->cached_table ? ifp->cached_table : 0); gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); break; @@ -4366,8 +4372,8 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { Message::Args args; - if (item->name) - args.add(item->name); + if (item->name.length) + args.add(item->name.str); else args.add(""); @@ -4801,7 +4807,7 @@ void gp_walk(const Item* item, void* arg) gwip->scsp = scsp; gwip->funcName.clear(); - gwip->columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), scsp)); + gwip->columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), scsp)); //@bug4636 take where clause column as dummy projection column, but only on local column. // varbinary aggregate is not supported yet, so rule it out @@ -4897,7 +4903,7 @@ void gp_walk(const Item* item, void* arg) if (!gwip->condPush) { - if (ifp->has_subquery() || funcName == "") + if (ifp->with_subquery() || funcName == "") { buildSubselectFunc(ifp, gwip); return; @@ -5650,7 +5656,7 @@ void parse_item (Item* item, vector& field_vec, // item is a Item_cache_wrapper. Shouldn't get here. printf("EXPR_CACHE_ITEM in parse_item\n"); string parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_SUB_QUERY_TYPE); - setError(item->thd(), ER_CHECK_NOT_IMPLEMENTED, parseErrorText); + setError(gwi->thd, ER_CHECK_NOT_IMPLEMENTED, parseErrorText); break; } @@ -5775,7 +5781,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i while ((sj_nest = sj_list_it++)) { - cerr << sj_nest->db << "." << sj_nest->table_name << endl; + cerr << sj_nest->db.str << "." << sj_nest->table_name.str << endl; } #endif @@ -5788,7 +5794,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i for (; table_ptr; table_ptr = table_ptr->next_local) { // mysql put vtable here for from sub. we ignore it - if (string(table_ptr->table_name).find("$vtable") != string::npos) + if (string(table_ptr->table_name.str).find("$vtable") != string::npos) continue; // Until we handle recursive cte: @@ -5811,7 +5817,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i SELECT_LEX* select_cursor = table_ptr->derived->first_select(); FromSubQuery fromSub(gwi, select_cursor); - string alias(table_ptr->alias); + string alias(table_ptr->alias.str); fromSub.alias(lower(alias)); CalpontSystemCatalog::TableAliasName tn = make_aliasview("", "", alias, viewName); @@ -5834,7 +5840,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i else if (table_ptr->view) { View* view = new View(table_ptr->view->select_lex, &gwi); - CalpontSystemCatalog::TableAliasName tn = make_aliastable(table_ptr->db, table_ptr->table_name, table_ptr->alias); + CalpontSystemCatalog::TableAliasName tn = make_aliastable(table_ptr->db.str, table_ptr->table_name.str, table_ptr->alias.str); view->viewName(tn); gwi.viewList.push_back(view); view->transform(); @@ -5846,17 +5852,17 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i // trigger system catalog cache if (infiniDB) - csc->columnRIDs(make_table(table_ptr->db, table_ptr->table_name), true); + csc->columnRIDs(make_table(table_ptr->db.str, table_ptr->table_name.str), true); - string table_name = table_ptr->table_name; + string table_name = table_ptr->table_name.str; // @bug5523 - if (table_ptr->db && strcmp(table_ptr->db, "information_schema") == 0) - table_name = (table_ptr->schema_table_name ? table_ptr->schema_table_name : table_ptr->alias); + if (table_ptr->db.length && strcmp(table_ptr->db.str, "information_schema") == 0) + table_name = (table_ptr->schema_table_name.length ? table_ptr->schema_table_name.str : table_ptr->alias.str); - CalpontSystemCatalog::TableAliasName tn = make_aliasview(table_ptr->db, table_name, table_ptr->alias, viewName, infiniDB); + CalpontSystemCatalog::TableAliasName tn = make_aliasview(table_ptr->db.str, table_name, table_ptr->alias.str, viewName, infiniDB); gwi.tbList.push_back(tn); - CalpontSystemCatalog::TableAliasName tan = make_aliastable(table_ptr->db, table_name, table_ptr->alias, infiniDB); + CalpontSystemCatalog::TableAliasName tan = make_aliastable(table_ptr->db.str, table_name, table_ptr->alias.str, infiniDB); gwi.tableMap[tan] = make_pair(0, table_ptr); #ifdef DEBUG_WALK_COND cerr << tn << endl; @@ -5894,7 +5900,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i bool unionSel = false; - if (!isUnion && select_lex.master_unit()->is_union()) + if (!isUnion && select_lex.master_unit()->is_unit_op()) { gwi.thd->infinidb_vtable.isUnion = true; CalpontSelectExecutionPlan::SelectList unionVec; @@ -6162,7 +6168,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i while ((item = it++)) { - string itemAlias = (item->name ? item->name : ""); + string itemAlias = (item->name.length ? item->name.str : ""); // @bug 5916. Need to keep checking until getting concret item in case // of nested view. @@ -6181,7 +6187,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i Item_field* ifp = (Item_field*)item; SimpleColumn* sc = NULL; - if (ifp->field_name && string(ifp->field_name) == "*") + if (ifp->field_name.length && string(ifp->field_name.str) == "*") { collectAllCols(gwi, ifp); break; @@ -6217,13 +6223,13 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i if (ifp->is_autogenerated_name) gwi.selectCols.push_back("`" + escapeBackTick(fullname.c_str()) + "`" + " `" + - escapeBackTick(itemAlias.empty() ? ifp->name : itemAlias.c_str()) + "`"); + escapeBackTick(itemAlias.empty() ? ifp->name.str : itemAlias.c_str()) + "`"); else - gwi.selectCols.push_back("`" + escapeBackTick((itemAlias.empty() ? ifp->name : itemAlias.c_str())) + "`"); + gwi.selectCols.push_back("`" + escapeBackTick((itemAlias.empty() ? ifp->name.str : itemAlias.c_str())) + "`"); gwi.returnedCols.push_back(spsc); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), spsc)); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), spsc)); TABLE_LIST* tmp = 0; if (ifp->cached_table) @@ -6295,7 +6301,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i bool hasNonSupportItem = false; parse_item(ifp, tmpVec, hasNonSupportItem, parseInfo); - if (ifp->has_subquery() || + if (ifp->with_subquery() || string(ifp->func_name()) == string("") || ifp->functype() == Item_func::NOT_ALL_FUNC || parseInfo & SUB_BIT) @@ -6319,8 +6325,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i srcp.reset(buildReturnedColumn(item, gwi, gwi.fatalParseError)); gwi.returnedCols.push_back(srcp); - if (ifp->name) - srcp->alias(ifp->name); + if (ifp->name.length) + srcp->alias(ifp->name.str); continue; } @@ -6335,7 +6341,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i redo = true; String str; ifp->print(&str, QT_INFINIDB_NO_QUOTE); - gwi.selectCols.push_back(string(str.c_ptr()) + " " + "`" + escapeBackTick(item->name) + "`"); + gwi.selectCols.push_back(string(str.c_ptr()) + " " + "`" + escapeBackTick(item->name.str) + "`"); } break; @@ -6354,8 +6360,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i if (sel_cols_in_create.length() != 0) sel_cols_in_create += ", "; - sel_cols_in_create += string(str.c_ptr()) + " `" + ifp->name + "`"; - gwi.selectCols.push_back("`" + escapeBackTick(ifp->name) + "`"); + sel_cols_in_create += string(str.c_ptr()) + " `" + ifp->name.str + "`"; + gwi.selectCols.push_back("`" + escapeBackTick(ifp->name.str) + "`"); } } else // InfiniDB Non support functions still go through post process for now @@ -6402,8 +6408,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i SRCP srcp(cc); - if (ifp->name) - cc->alias(ifp->name); + if (ifp->name.length) + cc->alias(ifp->name.str); gwi.returnedCols.push_back(srcp); @@ -6455,7 +6461,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i ifp->print(&funcStr, QT_INFINIDB); string valStr; valStr.assign(funcStr.ptr(), funcStr.length()); - gwi.selectCols.push_back(valStr + " `" + escapeBackTick(ifp->name) + "`"); + gwi.selectCols.push_back(valStr + " `" + escapeBackTick(ifp->name.str) + "`"); // clear the error set by buildFunctionColumn gwi.fatalParseError = false; gwi.parseErrorText = ""; @@ -6472,14 +6478,14 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i else { // do not push the dummy column (mysql added) to returnedCol - if (item->name && string(item->name) == "Not_used") + if (item->name.length && string(item->name.str) == "Not_used") continue; // @bug3509. Constant column is sent to ExeMgr now. SRCP srcp(buildReturnedColumn(item, gwi, gwi.fatalParseError)); - if (item->name) - srcp->alias(item->name); + if (item->name.length) + srcp->alias(item->name.str); gwi.returnedCols.push_back(srcp); @@ -6506,8 +6512,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i SRCP srcp(buildReturnedColumn(item, gwi, gwi.fatalParseError)); gwi.returnedCols.push_back(srcp); - if (item->name) - srcp->alias(item->name); + if (item->name.length) + srcp->alias(item->name.str); Item_string* isp = reinterpret_cast(item); String val, *str = isp->val_str(&val); @@ -6534,8 +6540,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i SRCP srcp(buildReturnedColumn(item, gwi, gwi.fatalParseError)); gwi.returnedCols.push_back(srcp); - if (item->name) - srcp->alias(item->name); + if (item->name.length) + srcp->alias(item->name.str); Item_decimal* isp = reinterpret_cast(item); String val, *str = isp->val_str(&val); @@ -6563,8 +6569,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i SRCP srcp(buildReturnedColumn(item, gwi, gwi.fatalParseError)); gwi.returnedCols.push_back(srcp); - if (item->name) - srcp->alias(item->name); + if (item->name.length) + srcp->alias(item->name.str); string name = string("null `") + escapeBackTick(srcp->alias().c_str()) + string("`") ; @@ -6624,18 +6630,18 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i if (sub->get_select_lex()->get_table_list()) rc->viewName(lower(getViewName(sub->get_select_lex()->get_table_list()))); - if (sub->name) - rc->alias(sub->name); + if (sub->name.length) + rc->alias(sub->name.str); gwi.returnedCols.push_back(SRCP(rc)); String str; sub->get_select_lex()->print(gwi.thd, &str, QT_INFINIDB_NO_QUOTE); sel_cols_in_create += "(" + string(str.c_ptr()) + ")"; - if (sub->name) + if (sub->name.length) { - sel_cols_in_create += "`" + escapeBackTick(sub->name) + "`"; - gwi.selectCols.push_back(sub->name); + sel_cols_in_create += "`" + escapeBackTick(sub->name.str) + "`"; + gwi.selectCols.push_back(sub->name.str); } else { @@ -6785,8 +6791,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i { emsg = "un-recognized column"; - if (funcFieldVec[i]->name) - emsg += string(funcFieldVec[i]->name); + if (funcFieldVec[i]->name.length) + emsg += string(funcFieldVec[i]->name.str); } else { @@ -6819,7 +6825,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i if (j == gwi.returnedCols.size()) { gwi.returnedCols.push_back(srcp); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(funcFieldVec[i]->field_name), srcp)); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(funcFieldVec[i]->field_name.str), srcp)); if (sel_cols_in_create.length() != 0) sel_cols_in_create += ", "; @@ -6915,7 +6921,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i for (; i < gwi.returnedCols.size(); i++) { - if (string(groupItem->name) == gwi.returnedCols[i]->alias()) + if (string(groupItem->name.str) == gwi.returnedCols[i]->alias()) { ReturnedColumn* rc = gwi.returnedCols[i]->clone(); rc->orderPos(i); @@ -6979,7 +6985,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i } else { - if (ifp->name && string(ifp->name) == gwi.returnedCols[j].get()->alias()) + if (ifp->name.length && string(ifp->name.str) == gwi.returnedCols[j].get()->alias()) { rc = gwi.returnedCols[j].get()->clone(); rc->orderPos(j); @@ -7006,7 +7012,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i } gwi.groupByCols.push_back(srcp); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), srcp)); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), srcp)); } // @bug5638. The group by column is constant but not counter, alias has to match a column // on the select list @@ -7020,7 +7026,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i for (uint32_t j = 0; j < gwi.returnedCols.size(); j++) { - if (groupItem->name && string(groupItem->name) == gwi.returnedCols[j].get()->alias()) + if (groupItem->name.length && string(groupItem->name.str) == gwi.returnedCols[j].get()->alias()) { rc = gwi.returnedCols[j].get()->clone(); rc->orderPos(j); @@ -7038,7 +7044,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i } else if ((*(groupcol->item))->type() == Item::SUBSELECT_ITEM) { - if (!groupcol->in_field_list || !groupItem->name) + if (!groupcol->in_field_list || !groupItem->name.length) { nonSupportItem = groupItem; } @@ -7048,7 +7054,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i for (; i < gwi.returnedCols.size(); i++) { - if (string(groupItem->name) == gwi.returnedCols[i]->alias()) + if (string(groupItem->name.str) == gwi.returnedCols[i]->alias()) { ReturnedColumn* rc = gwi.returnedCols[i]->clone(); rc->orderPos(i); @@ -7105,8 +7111,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i { Message::Args args; - if (nonSupportItem->name) - args.add("'" + string(nonSupportItem->name) + "'"); + if (nonSupportItem->name.length) + args.add("'" + string(nonSupportItem->name.str) + "'"); else args.add(""); @@ -7124,8 +7130,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i string select_query(gwi.thd->infinidb_vtable.select_vtable_query.c_ptr()); string lower_create_query(gwi.thd->infinidb_vtable.create_vtable_query.c_ptr()); string lower_select_query(gwi.thd->infinidb_vtable.select_vtable_query.c_ptr()); - algorithm::to_lower(lower_create_query); - algorithm::to_lower(lower_select_query); + boost::algorithm::to_lower(lower_create_query); + boost::algorithm::to_lower(lower_select_query); // check if window functions are in order by. InfiniDB process order by list if @@ -7238,9 +7244,9 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i else if (ord_item->type() == Item::FUNC_ITEM) { // @bug 2621. order by alias - if (!ord_item->is_autogenerated_name && ord_item->name) + if (!ord_item->is_autogenerated_name && ord_item->name.length) { - ord_cols += ord_item->name; + ord_cols += ord_item->name.str; continue; } @@ -7377,7 +7383,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i continue; } } - else if (ord_item->name && ord_item->type() == Item::FIELD_ITEM) + else if (ord_item->name.length && ord_item->type() == Item::FIELD_ITEM) { Item_field* field = reinterpret_cast(ord_item); ReturnedColumn* rc = buildSimpleColumn(field, gwi); @@ -7400,7 +7406,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i } if (strcasecmp(fullname.c_str(), gwi.returnedCols[i]->alias().c_str()) == 0 || - strcasecmp(ord_item->name, gwi.returnedCols[i]->alias().c_str()) == 0) + strcasecmp(ord_item->name.str, gwi.returnedCols[i]->alias().c_str()) == 0) { ord_cols += string(" `") + escapeBackTick(gwi.returnedCols[i]->alias().c_str()) + '`'; addToSel = false; @@ -7491,7 +7497,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i sel_cols_in_create += fullname + " `" + escapeBackTick(fullname.c_str()) + "`"; gwi.returnedCols.push_back(srcp); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(fieldVec[i]->field_name), srcp)); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(fieldVec[i]->field_name.str), srcp)); TABLE_LIST* tmp = (fieldVec[i]->cached_table ? fieldVec[i]->cached_table : 0); gwi.tableMap[make_aliastable(sc->schemaName(), sc->tableName(), sc->tableAlias(), sc->isInfiniDB())] = make_pair(1, tmp); @@ -7570,7 +7576,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i return ER_INTERNAL_ERROR; } - if (gwi.returnedCols.empty() && gwi.additionalRetCols.empty()) + if (gwi.returnedCols.empty() && gwi.additionalRetCols.empty() && minSc) gwi.returnedCols.push_back(minSc); } @@ -7609,12 +7615,12 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i for (; table_ptr; table_ptr = table_ptr->next_global) { - if (string(table_ptr->table_name).find("$vtable") != string::npos) + if (string(table_ptr->table_name.str).find("$vtable") != string::npos) continue; if (table_ptr->derived) { - if (aliasSet.find(table_ptr->alias) != aliasSet.end()) + if (aliasSet.find(table_ptr->alias.str) != aliasSet.end()) continue; String str; @@ -7623,21 +7629,21 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i if (!firstTb) create_query += ", "; - create_query += "(" + string(str.c_ptr()) + ") " + string(table_ptr->alias); + create_query += "(" + string(str.c_ptr()) + ") " + string(table_ptr->alias.str); firstTb = false; - aliasSet.insert(table_ptr->alias); + aliasSet.insert(table_ptr->alias.str); } else if (table_ptr->view) { - if (aliasSet.find(table_ptr->alias) != aliasSet.end()) + if (aliasSet.find(table_ptr->alias.str) != aliasSet.end()) continue; if (!firstTb) create_query += ", "; - create_query += string(table_ptr->db) + "." + string(table_ptr->table_name) + - string(" `") + escapeBackTick(table_ptr->alias) + string("`"); - aliasSet.insert(table_ptr->alias); + create_query += string(table_ptr->db.str) + "." + string(table_ptr->table_name.str) + + string(" `") + escapeBackTick(table_ptr->alias.str) + string("`"); + aliasSet.insert(table_ptr->alias.str); firstTb = false; } else @@ -7646,31 +7652,31 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i // consistent with item.cc field print. if (table_ptr->referencing_view) { - if (aliasSet.find(string(table_ptr->referencing_view->alias) + "_" + - string(table_ptr->alias)) != aliasSet.end()) + if (aliasSet.find(string(table_ptr->referencing_view->alias.str) + "_" + + string(table_ptr->alias.str)) != aliasSet.end()) continue; if (!firstTb) create_query += ", "; - create_query += string(table_ptr->db) + "." + string(table_ptr->table_name) + string(" "); + create_query += string(table_ptr->db.str) + "." + string(table_ptr->table_name.str) + string(" "); create_query += string(" `") + - escapeBackTick(table_ptr->referencing_view->alias) + "_" + - escapeBackTick(table_ptr->alias) + string("`"); - aliasSet.insert(string(table_ptr->referencing_view->alias) + "_" + - string(table_ptr->alias)); + escapeBackTick(table_ptr->referencing_view->alias.str) + "_" + + escapeBackTick(table_ptr->alias.str) + string("`"); + aliasSet.insert(string(table_ptr->referencing_view->alias.str) + "_" + + string(table_ptr->alias.str)); } else { - if (aliasSet.find(table_ptr->alias) != aliasSet.end()) + if (aliasSet.find(table_ptr->alias.str) != aliasSet.end()) continue; if (!firstTb) create_query += ", "; - create_query += string(table_ptr->db) + "." + string(table_ptr->table_name) + string(" "); - create_query += string("`") + escapeBackTick(table_ptr->alias) + string("`"); - aliasSet.insert(table_ptr->alias); + create_query += string(table_ptr->db.str) + "." + string(table_ptr->table_name.str) + string(" "); + create_query += string("`") + escapeBackTick(table_ptr->alias.str) + string("`"); + aliasSet.insert(table_ptr->alias.str); } firstTb = false; @@ -7759,7 +7765,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i //continue; } // @bug 3518. if order by clause = selected column, use position. - else if (ord_item->name && ord_item->type() == Item::FIELD_ITEM) + else if (ord_item->name.length && ord_item->type() == Item::FIELD_ITEM) { Item_field* field = reinterpret_cast(ord_item); string fullname; @@ -7770,8 +7776,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i if (field->table_name) fullname += string(field->table_name) + "."; - if (field->field_name) - fullname += string(field->field_name); + if (field->field_name.length) + fullname += string(field->field_name.str); uint32_t i = 0; @@ -7784,7 +7790,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i continue; if (strcasecmp(fullname.c_str(), gwi.returnedCols[i]->alias().c_str()) == 0 || - strcasecmp(ord_item->name, gwi.returnedCols[i]->alias().c_str()) == 0) + strcasecmp(ord_item->name.str, gwi.returnedCols[i]->alias().c_str()) == 0) { ostringstream oss; oss << i + 1; @@ -7794,15 +7800,15 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i } if (i == gwi.returnedCols.size()) - ord_cols += string(" `") + escapeBackTick(ord_item->name) + '`'; + ord_cols += string(" `") + escapeBackTick(ord_item->name.str) + '`'; } - else if (ord_item->name) + else if (ord_item->name.length) { // for union order by 1 case. For unknown reason, it doesn't show in_field_list if (ord_item->type() == Item::INT_ITEM) { - ord_cols += ord_item->name; + ord_cols += ord_item->name.str; } else if (ord_item->type() == Item::SUBSELECT_ITEM) { @@ -7812,7 +7818,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i } else { - ord_cols += string(" `") + escapeBackTick(ord_item->name) + '`'; + ord_cols += string(" `") + escapeBackTick(ord_item->name.str) + '`'; } } else if (ord_item->type() == Item::FUNC_ITEM) @@ -8071,9 +8077,11 @@ int cp_get_plan(THD* thd, SCSEP& csep) else if (status < 0) return status; +#ifdef DEBUG_WALK_COND cerr << "---------------- cp_get_plan EXECUTION PLAN ----------------" << endl; cerr << *csep << endl ; cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; +#endif // Derived table projection and filter optimization. derivedTableOptimization(csep); @@ -8106,13 +8114,13 @@ int cp_get_table_plan(THD* thd, SCSEP& csep, cal_table_info& ti) { if (bitmap_is_set(read_set, field->field_index)) { - SimpleColumn* sc = new SimpleColumn(table->s->db.str, table->s->table_name.str, field->field_name, sessionID); + SimpleColumn* sc = new SimpleColumn(table->s->db.str, table->s->table_name.str, field->field_name.str, sessionID); string alias(table->alias.c_ptr()); sc->tableAlias(lower(alias)); assert (sc); boost::shared_ptr spsc(sc); gwi->returnedCols.push_back(spsc); - gwi->columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(field->field_name), spsc)); + gwi->columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(field->field_name.str), spsc)); } } @@ -8320,7 +8328,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro while ((sj_nest = sj_list_it++)) { - cerr << sj_nest->db << "." << sj_nest->table_name << endl; + cerr << sj_nest->db.str << "." << sj_nest->table_name.str << endl; } #endif @@ -8356,7 +8364,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro SELECT_LEX* select_cursor = table_ptr->derived->first_select(); FromSubQuery fromSub(gwi, select_cursor); - string alias(table_ptr->alias); + string alias(table_ptr->alias.str); fromSub.alias(lower(alias)); CalpontSystemCatalog::TableAliasName tn = make_aliasview("", "", alias, viewName); @@ -8379,7 +8387,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro else if (table_ptr->view) { View* view = new View(table_ptr->view->select_lex, &gwi); - CalpontSystemCatalog::TableAliasName tn = make_aliastable(table_ptr->db, table_ptr->table_name, table_ptr->alias); + CalpontSystemCatalog::TableAliasName tn = make_aliastable(table_ptr->db.str, table_ptr->table_name.str, table_ptr->alias.str); view->viewName(tn); gwi.viewList.push_back(view); view->transform(); @@ -8391,17 +8399,17 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro // trigger system catalog cache if (infiniDB) - csc->columnRIDs(make_table(table_ptr->db, table_ptr->table_name), true); + csc->columnRIDs(make_table(table_ptr->db.str, table_ptr->table_name.str), true); - string table_name = table_ptr->table_name; + string table_name = table_ptr->table_name.str; // @bug5523 - if (table_ptr->db && strcmp(table_ptr->db, "information_schema") == 0) - table_name = (table_ptr->schema_table_name ? table_ptr->schema_table_name : table_ptr->alias); + if (table_ptr->db.length && strcmp(table_ptr->db.str, "information_schema") == 0) + table_name = (table_ptr->schema_table_name.length ? table_ptr->schema_table_name.str : table_ptr->alias.str); - CalpontSystemCatalog::TableAliasName tn = make_aliasview(table_ptr->db, table_name, table_ptr->alias, viewName, infiniDB); + CalpontSystemCatalog::TableAliasName tn = make_aliasview(table_ptr->db.str, table_name, table_ptr->alias.str, viewName, infiniDB); gwi.tbList.push_back(tn); - CalpontSystemCatalog::TableAliasName tan = make_aliastable(table_ptr->db, table_name, table_ptr->alias, infiniDB); + CalpontSystemCatalog::TableAliasName tan = make_aliastable(table_ptr->db.str, table_name, table_ptr->alias.str, infiniDB); gwi.tableMap[tan] = make_pair(0, table_ptr); #ifdef DEBUG_WALK_COND cerr << tn << endl; @@ -8590,8 +8598,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro // Given the size of gi.groupByAuxDescr is equal to gi.groupByFields fieldDescr = itDescr++; string itemAlias; - if(item->name) - itemAlias = (item->name); + if(item->name.length) + itemAlias = (item->name.str); else { itemAlias = (fieldDescr ? fieldDescr: ""); @@ -8615,7 +8623,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro SimpleColumn* sc = NULL; ConstantColumn* constCol = NULL; - if (ifp->field_name && string(ifp->field_name) == "*") + if (ifp->field_name.length && string(ifp->field_name.str) == "*") { collectAllCols(gwi, ifp); break; @@ -8653,21 +8661,21 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro if (ifp->is_autogenerated_name) gwi.selectCols.push_back("`" + escapeBackTick(fullname.c_str()) + "`" + " `" + - escapeBackTick(itemAlias.empty() ? ifp->name : itemAlias.c_str()) + "`"); + escapeBackTick(itemAlias.empty() ? ifp->name.str : itemAlias.c_str()) + "`"); else - gwi.selectCols.push_back("`" + escapeBackTick((itemAlias.empty() ? ifp->name : itemAlias.c_str())) + "`"); + gwi.selectCols.push_back("`" + escapeBackTick((itemAlias.empty() ? ifp->name.str : itemAlias.c_str())) + "`"); // MCOL-1052 Replace SimpleColumn with ConstantColumn, // since it must have a single value only. if (constCol) { gwi.returnedCols.push_back(spcc); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), spcc)); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), spcc)); } else { gwi.returnedCols.push_back(spsc); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), spsc)); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), spsc)); } TABLE_LIST* tmp = 0; @@ -8709,7 +8717,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro // This item will be used in HAVING later. Item_func_or_sum* isfp = reinterpret_cast(item); - if ( ! isfp->name_length ) + if ( ! isfp->name.length ) { gwi.havingAggColsItems.push_back(item); } @@ -8751,7 +8759,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro bool hasNonSupportItem = false; parse_item(ifp, tmpVec, hasNonSupportItem, parseInfo); - if (ifp->has_subquery() || + if (ifp->with_subquery() || string(ifp->func_name()) == string("") || ifp->functype() == Item_func::NOT_ALL_FUNC || parseInfo & SUB_BIT) @@ -8775,8 +8783,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro srcp.reset(buildReturnedColumn(item, gwi, gwi.fatalParseError)); gwi.returnedCols.push_back(srcp); - if (ifp->name) - srcp->alias(ifp->name); + if (ifp->name.length) + srcp->alias(ifp->name.str); continue; } @@ -8791,7 +8799,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro redo = true; String str; ifp->print(&str, QT_INFINIDB_NO_QUOTE); - gwi.selectCols.push_back(string(str.c_ptr()) + " " + "`" + escapeBackTick(item->name) + "`"); + gwi.selectCols.push_back(string(str.c_ptr()) + " " + "`" + escapeBackTick(item->name.str) + "`"); } break; @@ -8810,8 +8818,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro if (sel_cols_in_create.length() != 0) sel_cols_in_create += ", "; - sel_cols_in_create += string(str.c_ptr()) + " `" + ifp->name + "`"; - gwi.selectCols.push_back("`" + escapeBackTick(ifp->name) + "`"); + sel_cols_in_create += string(str.c_ptr()) + " `" + ifp->name.str + "`"; + gwi.selectCols.push_back("`" + escapeBackTick(ifp->name.str) + "`"); } } else // InfiniDB Non support functions still go through post process for now @@ -8861,8 +8869,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro SRCP srcp(cc); - if (ifp->name) - cc->alias(ifp->name); + if (ifp->name.length) + cc->alias(ifp->name.str); gwi.returnedCols.push_back(srcp); @@ -8912,7 +8920,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro // @bug 1706 String funcStr; ifp->print(&funcStr, QT_INFINIDB); - gwi.selectCols.push_back(string(funcStr.c_ptr()) + " `" + escapeBackTick(ifp->name) + "`"); + gwi.selectCols.push_back(string(funcStr.c_ptr()) + " `" + escapeBackTick(ifp->name.str) + "`"); // clear the error set by buildFunctionColumn gwi.fatalParseError = false; gwi.parseErrorText = ""; @@ -8929,14 +8937,14 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro else { // do not push the dummy column (mysql added) to returnedCol - if (item->name && string(item->name) == "Not_used") + if (item->name.length && string(item->name.str) == "Not_used") continue; // @bug3509. Constant column is sent to ExeMgr now. SRCP srcp(buildReturnedColumn(item, gwi, gwi.fatalParseError)); - if (item->name) - srcp->alias(item->name); + if (item->name.length) + srcp->alias(item->name.str); gwi.returnedCols.push_back(srcp); @@ -8963,8 +8971,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro SRCP srcp(buildReturnedColumn(item, gwi, gwi.fatalParseError)); gwi.returnedCols.push_back(srcp); - if (item->name) - srcp->alias(item->name); + if (item->name.length) + srcp->alias(item->name.str); Item_string* isp = reinterpret_cast(item); String val, *str = isp->val_str(&val); @@ -8991,8 +8999,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro SRCP srcp(buildReturnedColumn(item, gwi, gwi.fatalParseError)); gwi.returnedCols.push_back(srcp); - if (item->name) - srcp->alias(item->name); + if (item->name.length) + srcp->alias(item->name.str); Item_decimal* isp = reinterpret_cast(item); String val, *str = isp->val_str(&val); @@ -9081,18 +9089,18 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro if (sub->get_select_lex()->get_table_list()) rc->viewName(lower(getViewName(sub->get_select_lex()->get_table_list()))); - if (sub->name) - rc->alias(sub->name); + if (sub->name.length) + rc->alias(sub->name.str); gwi.returnedCols.push_back(SRCP(rc)); String str; sub->get_select_lex()->print(gwi.thd, &str, QT_INFINIDB_NO_QUOTE); sel_cols_in_create += "(" + string(str.c_ptr()) + ")"; - if (sub->name) + if (sub->name.length) { - sel_cols_in_create += "`" + escapeBackTick(sub->name) + "`"; - gwi.selectCols.push_back(sub->name); + sel_cols_in_create += "`" + escapeBackTick(sub->name.str) + "`"; + gwi.selectCols.push_back(sub->name.str); } else { @@ -9241,8 +9249,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro { emsg = "un-recognized column"; - if (funcFieldVec[i]->name) - emsg += string(funcFieldVec[i]->name); + if (funcFieldVec[i]->name.length) + emsg += string(funcFieldVec[i]->name.str); } else { @@ -9275,7 +9283,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro if (j == gwi.returnedCols.size()) { gwi.returnedCols.push_back(srcp); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(funcFieldVec[i]->field_name), srcp)); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(funcFieldVec[i]->field_name.str), srcp)); if (sel_cols_in_create.length() != 0) sel_cols_in_create += ", "; @@ -9371,7 +9379,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro for (; i < gwi.returnedCols.size(); i++) { - if (string(groupItem->name) == gwi.returnedCols[i]->alias()) + if (string(groupItem->name.str) == gwi.returnedCols[i]->alias()) { ReturnedColumn* rc = gwi.returnedCols[i]->clone(); rc->orderPos(i); @@ -9435,7 +9443,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro } else { - if (ifp->name && string(ifp->name) == gwi.returnedCols[j].get()->alias()) + if (ifp->name.length && string(ifp->name.str) == gwi.returnedCols[j].get()->alias()) { rc = gwi.returnedCols[j].get()->clone(); rc->orderPos(j); @@ -9462,7 +9470,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro } gwi.groupByCols.push_back(srcp); - gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name), srcp)); + gwi.columnMap.insert(CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), srcp)); } // @bug5638. The group by column is constant but not counter, alias has to match a column // on the select list @@ -9476,7 +9484,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro for (uint32_t j = 0; j < gwi.returnedCols.size(); j++) { - if (groupItem->name && string(groupItem->name) == gwi.returnedCols[j].get()->alias()) + if (groupItem->name.length && string(groupItem->name.str) == gwi.returnedCols[j].get()->alias()) { rc = gwi.returnedCols[j].get()->clone(); rc->orderPos(j); @@ -9494,7 +9502,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro } else if ((*(groupcol->item))->type() == Item::SUBSELECT_ITEM) { - if (!groupcol->in_field_list || !groupItem->name) + if (!groupcol->in_field_list || !groupItem->name.length) { nonSupportItem = groupItem; } @@ -9504,7 +9512,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro for (; i < gwi.returnedCols.size(); i++) { - if (string(groupItem->name) == gwi.returnedCols[i]->alias()) + if (string(groupItem->name.str) == gwi.returnedCols[i]->alias()) { ReturnedColumn* rc = gwi.returnedCols[i]->clone(); rc->orderPos(i); @@ -9561,8 +9569,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro { Message::Args args; - if (nonSupportItem->name) - args.add("'" + string(nonSupportItem->name) + "'"); + if (nonSupportItem->name.length) + args.add("'" + string(nonSupportItem->name.str) + "'"); else args.add(""); @@ -9580,8 +9588,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro string select_query(gwi.thd->infinidb_vtable.select_vtable_query.c_ptr()); string lower_create_query(gwi.thd->infinidb_vtable.create_vtable_query.c_ptr()); string lower_select_query(gwi.thd->infinidb_vtable.select_vtable_query.c_ptr()); - algorithm::to_lower(lower_create_query); - algorithm::to_lower(lower_select_query); + boost::algorithm::to_lower(lower_create_query); + boost::algorithm::to_lower(lower_select_query); // check if window functions are in order by. InfiniDB process order by list if @@ -9631,7 +9639,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro // The item must be an alias for a projected column // and extended SELECT list must contain a proper rc // either aggregation or a field. - if (!rc && ifp->name_length) + if (!rc && ifp->name.length) { gwi.fatalParseError = false; execplan::CalpontSelectExecutionPlan::ReturnedColumnList::iterator iter = gwi.returnedCols.begin(); @@ -9639,7 +9647,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro for ( ; iter != gwi.returnedCols.end(); iter++ ) { - if ( (*iter).get()->alias() == ord_item->name ) + if ( (*iter).get()->alias() == ord_item->name.str ) { rc = (*iter).get()->clone(); nonAggField = rc->hasAggregate() ? false : true; @@ -9688,8 +9696,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro else ostream << "unknown table" << '.'; - if (iip->field_name) - ostream << iip->field_name; + if (iip->field_name.length) + ostream << iip->field_name.str; else ostream << "unknown field"; @@ -9856,12 +9864,12 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro for (; table_ptr; table_ptr = table_ptr->next_local) { - if (string(table_ptr->table_name).find("$vtable") != string::npos) + if (string(table_ptr->table_name.str).find("$vtable") != string::npos) continue; if (table_ptr->derived) { - if (aliasSet.find(table_ptr->alias) != aliasSet.end()) + if (aliasSet.find(table_ptr->alias.str) != aliasSet.end()) continue; String str; @@ -9870,21 +9878,21 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro if (!firstTb) create_query += ", "; - create_query += "(" + string(str.c_ptr()) + ") " + string(table_ptr->alias); + create_query += "(" + string(str.c_ptr()) + ") " + string(table_ptr->alias.str); firstTb = false; - aliasSet.insert(table_ptr->alias); + aliasSet.insert(table_ptr->alias.str); } else if (table_ptr->view) { - if (aliasSet.find(table_ptr->alias) != aliasSet.end()) + if (aliasSet.find(table_ptr->alias.str) != aliasSet.end()) continue; if (!firstTb) create_query += ", "; - create_query += string(table_ptr->db) + "." + string(table_ptr->table_name) + - string(" `") + escapeBackTick(table_ptr->alias) + string("`"); - aliasSet.insert(table_ptr->alias); + create_query += string(table_ptr->db.str) + "." + string(table_ptr->table_name.str) + + string(" `") + escapeBackTick(table_ptr->alias.str) + string("`"); + aliasSet.insert(table_ptr->alias.str); firstTb = false; } else @@ -9893,31 +9901,31 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro // consistent with item.cc field print. if (table_ptr->referencing_view) { - if (aliasSet.find(string(table_ptr->referencing_view->alias) + "_" + - string(table_ptr->alias)) != aliasSet.end()) + if (aliasSet.find(string(table_ptr->referencing_view->alias.str) + "_" + + string(table_ptr->alias.str)) != aliasSet.end()) continue; if (!firstTb) create_query += ", "; - create_query += string(table_ptr->db) + "." + string(table_ptr->table_name) + string(" "); + create_query += string(table_ptr->db.str) + "." + string(table_ptr->table_name.str) + string(" "); create_query += string(" `") + - escapeBackTick(table_ptr->referencing_view->alias) + "_" + - escapeBackTick(table_ptr->alias) + string("`"); - aliasSet.insert(string(table_ptr->referencing_view->alias) + "_" + - string(table_ptr->alias)); + escapeBackTick(table_ptr->referencing_view->alias.str) + "_" + + escapeBackTick(table_ptr->alias.str) + string("`"); + aliasSet.insert(string(table_ptr->referencing_view->alias.str) + "_" + + string(table_ptr->alias.str)); } else { - if (aliasSet.find(table_ptr->alias) != aliasSet.end()) + if (aliasSet.find(table_ptr->alias.str) != aliasSet.end()) continue; if (!firstTb) create_query += ", "; - create_query += string(table_ptr->db) + "." + string(table_ptr->table_name) + string(" "); - create_query += string("`") + escapeBackTick(table_ptr->alias) + string("`"); - aliasSet.insert(table_ptr->alias); + create_query += string(table_ptr->db.str) + "." + string(table_ptr->table_name.str) + string(" "); + create_query += string("`") + escapeBackTick(table_ptr->alias.str) + string("`"); + aliasSet.insert(table_ptr->alias.str); } firstTb = false; @@ -10011,7 +10019,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro //continue; } // @bug 3518. if order by clause = selected column, use position. - else if (ord_item->name && ord_item->type() == Item::FIELD_ITEM) + else if (ord_item->name.length && ord_item->type() == Item::FIELD_ITEM) { Item_field* field = reinterpret_cast(ord_item); string fullname; @@ -10022,8 +10030,8 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro if (field->table_name) fullname += string(field->table_name) + "."; - if (field->field_name) - fullname += string(field->field_name); + if (field->field_name.length) + fullname += string(field->field_name.str); uint32_t i = 0; @@ -10036,7 +10044,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro continue; if (strcasecmp(fullname.c_str(), gwi.returnedCols[i]->alias().c_str()) == 0 || - strcasecmp(ord_item->name, gwi.returnedCols[i]->alias().c_str()) == 0) + strcasecmp(ord_item->name.str, gwi.returnedCols[i]->alias().c_str()) == 0) { ostringstream oss; oss << i + 1; @@ -10046,15 +10054,15 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro } if (i == gwi.returnedCols.size()) - ord_cols += string(" `") + escapeBackTick(ord_item->name) + '`'; + ord_cols += string(" `") + escapeBackTick(ord_item->name.str) + '`'; } - else if (ord_item->name) + else if (ord_item->name.length) { // for union order by 1 case. For unknown reason, it doesn't show in_field_list if (ord_item->type() == Item::INT_ITEM) { - ord_cols += ord_item->name; + ord_cols += ord_item->name.str; } else if (ord_item->type() == Item::SUBSELECT_ITEM) { @@ -10064,7 +10072,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro } else { - ord_cols += string(" `") + escapeBackTick(ord_item->name) + '`'; + ord_cols += string(" `") + escapeBackTick(ord_item->name.str) + '`'; } } else if (ord_item->type() == Item::FUNC_ITEM) diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 698974f56..62712f42a 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -61,7 +61,7 @@ using namespace std; #include #include #include -using namespace boost; +//using namespace boost; #include "idb_mysql.h" @@ -977,7 +977,7 @@ uint32_t doUpdateDelete(THD* thd) } //@Bug 4387. Check BRM status before start statement. - scoped_ptr dbrmp(new DBRM()); + boost::scoped_ptr dbrmp(new DBRM()); int rc = dbrmp->isReadWrite(); thd->infinidb_vtable.isInfiniDBDML = true; @@ -1133,7 +1133,7 @@ uint32_t doUpdateDelete(THD* thd) schemaName = string(item->db_name); columnAssignmentPtr = new ColumnAssignment(); - columnAssignmentPtr->fColumn = string(item->name); + columnAssignmentPtr->fColumn = string(item->name.str); columnAssignmentPtr->fOperator = "="; columnAssignmentPtr->fFuncScale = 0; Item* value = value_it++; @@ -1279,7 +1279,7 @@ uint32_t doUpdateDelete(THD* thd) { Item_field* tmp = (Item_field*)value; - if (!tmp->field_name) //null + if (!tmp->field_name.length) //null { columnAssignmentPtr->fScalarExpression = "NULL"; columnAssignmentPtr->fFromCol = false; @@ -1400,9 +1400,9 @@ uint32_t doUpdateDelete(THD* thd) if (deleteTable->get_num_of_tables() == 1) { - schemaName = first_table->db; - tableName = first_table->table_name; - aliasName = first_table->alias; + schemaName = first_table->db.str; + tableName = first_table->table_name.str; + aliasName = first_table->alias.str; qualifiedTablName->fName = tableName; qualifiedTablName->fSchema = schemaName; pDMLPackage = CalpontDMLFactory::makeCalpontDMLPackageFromMysqlBuffer(dmlStatement); @@ -1421,7 +1421,7 @@ uint32_t doUpdateDelete(THD* thd) first_table = (TABLE_LIST*) thd->lex->select_lex.table_list.first; schemaName = first_table->table->s->db.str; tableName = first_table->table->s->table_name.str; - aliasName = first_table->alias; + aliasName = first_table->alias.str; qualifiedTablName->fName = tableName; qualifiedTablName->fSchema = schemaName; pDMLPackage = CalpontDMLFactory::makeCalpontDMLPackageFromMysqlBuffer(dmlStatement); @@ -1432,7 +1432,7 @@ uint32_t doUpdateDelete(THD* thd) first_table = (TABLE_LIST*) thd->lex->select_lex.table_list.first; schemaName = first_table->table->s->db.str; tableName = first_table->table->s->table_name.str; - aliasName = first_table->alias; + aliasName = first_table->alias.str; qualifiedTablName->fName = tableName; qualifiedTablName->fSchema = schemaName; pDMLPackage = CalpontDMLFactory::makeCalpontDMLPackageFromMysqlBuffer(dmlStatement); @@ -2243,7 +2243,7 @@ extern "C" bool includeInput = true; string pstr(parameter); - algorithm::to_lower(pstr); + boost::algorithm::to_lower(pstr); if (pstr == PmSmallSideMaxMemory) { @@ -2389,8 +2389,8 @@ extern "C" { tableName.table = args->args[0]; - if (thd->db) - tableName.schema = thd->db; + if (thd->db.length) + tableName.schema = thd->db.str; else { string msg("No schema information provided"); @@ -2527,8 +2527,8 @@ extern "C" { tableName.table = args->args[0]; - if (thd->db) - tableName.schema = thd->db; + if (thd->db.length) + tableName.schema = thd->db.str; else { return -1; @@ -3022,8 +3022,8 @@ int ha_calpont_impl_rnd_init(TABLE* table) ti.csep->verID(verID); ti.csep->sessionID(sessionID); - if (thd->db) - ti.csep->schemaName(thd->db); + if (thd->db.length) + ti.csep->schemaName(thd->db.str); ti.csep->traceFlags(ci->traceFlags); ti.msTablePtr = table; @@ -3116,8 +3116,8 @@ int ha_calpont_impl_rnd_init(TABLE* table) csep->verID(verID); csep->sessionID(sessionID); - if (thd->db) - csep->schemaName(thd->db); + if (thd->db.length) + csep->schemaName(thd->db.str); csep->traceFlags(ci->traceFlags); @@ -3782,12 +3782,12 @@ int ha_calpont_impl_delete_table(const char* name) if (thd->lex->sql_command == SQLCOM_DROP_DB) { - dbName = thd->lex->name.str; + dbName = const_cast(thd->lex->name.str); } else { TABLE_LIST* first_table = (TABLE_LIST*) thd->lex->select_lex.table_list.first; - dbName = first_table->db; + dbName = const_cast(first_table->db.str); } if (!dbName) @@ -3809,7 +3809,7 @@ int ha_calpont_impl_delete_table(const char* name) if (strcmp(dbName, "calpontsys") == 0 && string(name).find("@0024vtable") == string::npos) { std::string stmt(idb_mysql_query_str(thd)); - algorithm::to_upper(stmt); + boost::algorithm::to_upper(stmt); //@Bug 2432. systables can be dropped with restrict if (stmt.find(" RESTRICT") != string::npos) @@ -3961,7 +3961,7 @@ void ha_calpont_impl_start_bulk_insert(ha_rows rows, TABLE* table) if ((thd->lex)->sql_command == SQLCOM_INSERT) { string insertStmt = idb_mysql_query_str(thd); - algorithm::to_lower(insertStmt); + boost::algorithm::to_lower(insertStmt); string intoStr("into"); size_t found = insertStmt.find(intoStr); @@ -4437,7 +4437,7 @@ void ha_calpont_impl_start_bulk_insert(ha_rows rows, TABLE* table) ci->stats.fQueryType = CalpontSelectExecutionPlan::queryTypeToString(CalpontSelectExecutionPlan::LOAD_DATA_INFILE); //@Bug 4387. Check BRM status before start statement. - scoped_ptr dbrmp(new DBRM()); + boost::scoped_ptr dbrmp(new DBRM()); int rc = dbrmp->isReadWrite(); if (rc != 0 ) @@ -4755,7 +4755,7 @@ int ha_calpont_impl_commit (handlerton* hton, THD* thd, bool all) return 0; //@Bug 5823 check if any active transaction for this session - scoped_ptr dbrmp(new DBRM()); + boost::scoped_ptr dbrmp(new DBRM()); BRM::TxnID txnId = dbrmp->getTxnID(tid2sid(thd->thread_id)); if (!txnId.valid) @@ -5257,8 +5257,8 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE csep->verID(verID); csep->sessionID(sessionID); - if (group_hand->table_list->db_length) - csep->schemaName(group_hand->table_list->db); + if (group_hand->table_list->db.length) + csep->schemaName(group_hand->table_list->db.str); csep->traceFlags(ci->traceFlags); diff --git a/dbcon/mysql/ha_calpont_partition.cpp b/dbcon/mysql/ha_calpont_partition.cpp index 99940262b..038c668c9 100644 --- a/dbcon/mysql/ha_calpont_partition.cpp +++ b/dbcon/mysql/ha_calpont_partition.cpp @@ -642,9 +642,9 @@ void partitionByValue_common(UDF_ARGS* args, // input } else { - if (current_thd->db) + if (current_thd->db.length) { - schema = current_thd->db; + schema = current_thd->db.str; } else { @@ -1019,9 +1019,9 @@ extern "C" } else { - if (current_thd->db) + if (current_thd->db.length) { - schema = current_thd->db; + schema = current_thd->db.str; } else { @@ -1228,7 +1228,7 @@ extern "C" { tableName.table = args->args[0]; - if (!current_thd->db) + if (!current_thd->db.length) { errMsg = "No schema name indicated."; memcpy(result, errMsg.c_str(), errMsg.length()); @@ -1236,7 +1236,7 @@ extern "C" return result; } - tableName.schema = current_thd->db; + tableName.schema = current_thd->db.str; parsePartitionString(args, 1, partitionNums, errMsg, tableName); } @@ -1316,14 +1316,14 @@ extern "C" { tableName.table = args->args[0]; - if (!current_thd->db) + if (!current_thd->db.length) { current_thd->get_stmt_da()->set_overwrite_status(true); current_thd->raise_error_printf(ER_INTERNAL_ERROR, IDBErrorInfo::instance()->errorMsg(ERR_PARTITION_NO_SCHEMA).c_str()); return result; } - tableName.schema = current_thd->db; + tableName.schema = current_thd->db.str; parsePartitionString(args, 1, partitionNums, errMsg, tableName); } @@ -1403,14 +1403,14 @@ extern "C" { tableName.table = args->args[0]; - if (!current_thd->db) + if (!current_thd->db.length) { current_thd->get_stmt_da()->set_overwrite_status(true); current_thd->raise_error_printf(ER_INTERNAL_ERROR, IDBErrorInfo::instance()->errorMsg(ERR_PARTITION_NO_SCHEMA).c_str()); return result; } - tableName.schema = current_thd->db; + tableName.schema = current_thd->db.str; parsePartitionString(args, 1, partSet, errMsg, tableName); } @@ -1724,9 +1724,9 @@ extern "C" } else { - if (current_thd->db) + if (current_thd->db.length) { - schema = current_thd->db; + schema = current_thd->db.str; } else { diff --git a/dbcon/mysql/ha_pseudocolumn.cpp b/dbcon/mysql/ha_pseudocolumn.cpp index 3e9b072f4..284130033 100644 --- a/dbcon/mysql/ha_pseudocolumn.cpp +++ b/dbcon/mysql/ha_pseudocolumn.cpp @@ -582,7 +582,7 @@ execplan::ReturnedColumn* buildPseudoColumn(Item* item, PseudoColumn* pc = new PseudoColumn(*sc, pseudoType); // @bug5892. set alias for derived table column matching. - pc->alias(ifp->name ? ifp->name : ""); + pc->alias(ifp->name.length ? ifp->name.str : ""); return pc; } diff --git a/dbcon/mysql/ha_view.cpp b/dbcon/mysql/ha_view.cpp index 5014007cc..764c2c5c5 100644 --- a/dbcon/mysql/ha_view.cpp +++ b/dbcon/mysql/ha_view.cpp @@ -84,7 +84,7 @@ void View::transform() for (; table_ptr; table_ptr = table_ptr->next_local) { // mysql put vtable here for from sub. we ignore it - if (string(table_ptr->table_name).find("$vtable") != string::npos) + if (string(table_ptr->table_name.str).find("$vtable") != string::npos) continue; string viewName = getViewName(table_ptr); @@ -93,8 +93,8 @@ void View::transform() { SELECT_LEX* select_cursor = table_ptr->derived->first_select(); FromSubQuery* fromSub = new FromSubQuery(gwi, select_cursor); - string alias(table_ptr->alias); - gwi.viewName = make_aliasview("", alias, table_ptr->belong_to_view->alias, ""); + string alias(table_ptr->alias.str); + gwi.viewName = make_aliasview("", alias, table_ptr->belong_to_view->alias.str, ""); algorithm::to_lower(alias); fromSub->alias(alias); gwi.derivedTbList.push_back(SCSEP(fromSub->transform())); @@ -107,8 +107,8 @@ void View::transform() else if (table_ptr->view) { // for nested view, the view name is vout.vin... format - CalpontSystemCatalog::TableAliasName tn = make_aliasview(table_ptr->db, table_ptr->table_name, table_ptr->alias, viewName); - gwi.viewName = make_aliastable(table_ptr->db, table_ptr->table_name, viewName); + CalpontSystemCatalog::TableAliasName tn = make_aliasview(table_ptr->db.str, table_ptr->table_name.str, table_ptr->alias.str, viewName); + gwi.viewName = make_aliastable(table_ptr->db.str, table_ptr->table_name.str, viewName); View* view = new View(table_ptr->view->select_lex, &gwi); view->viewName(gwi.viewName); gwi.viewList.push_back(view); @@ -121,9 +121,9 @@ void View::transform() // trigger system catalog cache if (infiniDB) - csc->columnRIDs(make_table(table_ptr->db, table_ptr->table_name), true); + csc->columnRIDs(make_table(table_ptr->db.str, table_ptr->table_name.str), true); - CalpontSystemCatalog::TableAliasName tn = make_aliasview(table_ptr->db, table_ptr->table_name, table_ptr->alias, viewName, infiniDB); + CalpontSystemCatalog::TableAliasName tn = make_aliasview(table_ptr->db.str, table_ptr->table_name.str, table_ptr->alias.str, viewName, infiniDB); gwi.tbList.push_back(tn); gwi.tableMap[tn] = make_pair(0, table_ptr); fParentGwip->tableMap[tn] = make_pair(0, table_ptr); diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp index 4c04a402c..ac7a2af3a 100644 --- a/dbcon/mysql/ha_window_function.cpp +++ b/dbcon/mysql/ha_window_function.cpp @@ -203,7 +203,7 @@ string ConvertFuncName(Item_sum* item) switch (item->sum_func()) { case Item_sum::COUNT_FUNC: - if (!item->arguments()[0]->name) + if (!item->arguments()[0]->name.str) return "COUNT(*)"; return "COUNT"; diff --git a/dbcon/mysql/idb_mysql.h b/dbcon/mysql/idb_mysql.h index dba9ae3c5..cce7bf9f4 100644 --- a/dbcon/mysql/idb_mysql.h +++ b/dbcon/mysql/idb_mysql.h @@ -63,6 +63,7 @@ template bool isnan(T); #endif #endif +#include "sql_plugin.h" #include "sql_table.h" #include "sql_select.h" #include "mysqld_error.h" diff --git a/dbcon/mysql/is_columnstore_columns.cpp b/dbcon/mysql/is_columnstore_columns.cpp index 84d81042c..278a606d4 100644 --- a/dbcon/mysql/is_columnstore_columns.cpp +++ b/dbcon/mysql/is_columnstore_columns.cpp @@ -61,13 +61,13 @@ static void get_cond_item(Item_func* item, String** table, String** db) char tmp_char[MAX_FIELD_WIDTH]; Item_field* item_field = (Item_field*) item->arguments()[0]->real_item(); - if (strcasecmp(item_field->field_name, "table_name") == 0) + if (strcasecmp(item_field->field_name.str, "table_name") == 0) { String str_buf(tmp_char, sizeof(tmp_char), system_charset_info); *table = item->arguments()[1]->val_str(&str_buf); return; } - else if (strcasecmp(item_field->field_name, "table_schema") == 0) + else if (strcasecmp(item_field->field_name.str, "table_schema") == 0) { String str_buf(tmp_char, sizeof(tmp_char), system_charset_info); *db = item->arguments()[1]->val_str(&str_buf); diff --git a/dbcon/mysql/is_columnstore_extents.cpp b/dbcon/mysql/is_columnstore_extents.cpp index 0fd42d3cf..bdde53316 100644 --- a/dbcon/mysql/is_columnstore_extents.cpp +++ b/dbcon/mysql/is_columnstore_extents.cpp @@ -207,7 +207,7 @@ static int is_columnstore_extents_fill(THD* thd, TABLE_LIST* tables, COND* cond) // WHERE object_id = value Item_field* item_field = (Item_field*) fitem->arguments()[0]->real_item(); - if (strcasecmp(item_field->field_name, "object_id") == 0) + if (strcasecmp(item_field->field_name.str, "object_id") == 0) { cond_oid = fitem->arguments()[1]->val_int(); return generate_result(cond_oid, emp, table, thd); @@ -219,7 +219,7 @@ static int is_columnstore_extents_fill(THD* thd, TABLE_LIST* tables, COND* cond) // WHERE value = object_id Item_field* item_field = (Item_field*) fitem->arguments()[1]->real_item(); - if (strcasecmp(item_field->field_name, "object_id") == 0) + if (strcasecmp(item_field->field_name.str, "object_id") == 0) { cond_oid = fitem->arguments()[0]->val_int(); return generate_result(cond_oid, emp, table, thd); @@ -231,7 +231,7 @@ static int is_columnstore_extents_fill(THD* thd, TABLE_LIST* tables, COND* cond) // WHERE object_id in (value1, value2) Item_field* item_field = (Item_field*) fitem->arguments()[0]->real_item(); - if (strcasecmp(item_field->field_name, "object_id") == 0) + if (strcasecmp(item_field->field_name.str, "object_id") == 0) { for (unsigned int i = 1; i < fitem->argument_count(); i++) { diff --git a/dbcon/mysql/is_columnstore_files.cpp b/dbcon/mysql/is_columnstore_files.cpp index 71d61958f..be0411058 100644 --- a/dbcon/mysql/is_columnstore_files.cpp +++ b/dbcon/mysql/is_columnstore_files.cpp @@ -212,7 +212,7 @@ static int is_columnstore_files_fill(THD* thd, TABLE_LIST* tables, COND* cond) // WHERE object_id = value Item_field* item_field = (Item_field*) fitem->arguments()[0]->real_item(); - if (strcasecmp(item_field->field_name, "object_id") == 0) + if (strcasecmp(item_field->field_name.str, "object_id") == 0) { cond_oid = fitem->arguments()[1]->val_int(); return generate_result(cond_oid, emp, table, thd); @@ -224,7 +224,7 @@ static int is_columnstore_files_fill(THD* thd, TABLE_LIST* tables, COND* cond) // WHERE value = object_id Item_field* item_field = (Item_field*) fitem->arguments()[1]->real_item(); - if (strcasecmp(item_field->field_name, "object_id") == 0) + if (strcasecmp(item_field->field_name.str, "object_id") == 0) { cond_oid = fitem->arguments()[0]->val_int(); return generate_result(cond_oid, emp, table, thd); @@ -236,7 +236,7 @@ static int is_columnstore_files_fill(THD* thd, TABLE_LIST* tables, COND* cond) // WHERE object_id in (value1, value2) Item_field* item_field = (Item_field*) fitem->arguments()[0]->real_item(); - if (strcasecmp(item_field->field_name, "object_id") == 0) + if (strcasecmp(item_field->field_name.str, "object_id") == 0) { for (unsigned int i = 1; i < fitem->argument_count(); i++) { diff --git a/dbcon/mysql/is_columnstore_tables.cpp b/dbcon/mysql/is_columnstore_tables.cpp index d422c2f90..02a5dd72e 100644 --- a/dbcon/mysql/is_columnstore_tables.cpp +++ b/dbcon/mysql/is_columnstore_tables.cpp @@ -47,13 +47,13 @@ static void get_cond_item(Item_func* item, String** table, String** db) char tmp_char[MAX_FIELD_WIDTH]; Item_field* item_field = (Item_field*) item->arguments()[0]->real_item(); - if (strcasecmp(item_field->field_name, "table_name") == 0) + if (strcasecmp(item_field->field_name.str, "table_name") == 0) { String str_buf(tmp_char, sizeof(tmp_char), system_charset_info); *table = item->arguments()[1]->val_str(&str_buf); return; } - else if (strcasecmp(item_field->field_name, "table_schema") == 0) + else if (strcasecmp(item_field->field_name.str, "table_schema") == 0) { String str_buf(tmp_char, sizeof(tmp_char), system_charset_info); *db = item->arguments()[1]->val_str(&str_buf); diff --git a/dbcon/mysql/sm.cpp b/dbcon/mysql/sm.cpp index 569fee6a5..565a65ad0 100644 --- a/dbcon/mysql/sm.cpp +++ b/dbcon/mysql/sm.cpp @@ -20,6 +20,7 @@ * ***********************************************************************/ +#include #include #include #include diff --git a/oam/install_scripts/post-install b/oam/install_scripts/post-install index f7aeeb2ca..1766a37c2 100755 --- a/oam/install_scripts/post-install +++ b/oam/install_scripts/post-install @@ -247,7 +247,6 @@ else $SUDO chmod 777 /tmp $installdir/bin/syslogSetup.sh --installdir=$installdir install > /tmp/syslog_install.log 2>&1 $SUDO chown $user:$user $installdir/etc/Columnstore.xml - $SUDO chmod -R 777 /dev/shm $SUDO mkdir /var/lock/subsys > /dev/null 2>&1 $SUDO chmod 777 /var/lock/subsys > /dev/null 2>&1 $SUDO rm -f /var/lock/subsys/mysql-Columnstore diff --git a/procmon/main.cpp b/procmon/main.cpp index 35a2ebb59..81b0d1864 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -134,13 +134,6 @@ int main(int argc, char** argv) if (p && *p) USER = p; - // change permissions on /dev/shm - if ( !rootUser) - { - string cmd = "sudo chmod 777 /dev/shm >/dev/null 2>&1"; - system(cmd.c_str()); - } - // get and set locale language string systemLang = "C"; diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index 5ff74b034..ce9aa0449 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -1063,14 +1063,6 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO int requestStatus = oam::API_SUCCESS; log.writeLog(__LINE__, "MSG RECEIVED: Start All process request..."); - // change permissions on /dev/shm - string cmd = "chmod 755 /dev/shm >/dev/null 2>&1"; - - if ( !rootUser) - cmd = "sudo chmod 777 /dev/shm >/dev/null 2>&1"; - - system(cmd.c_str()); - //start the mysqld daemon try { diff --git a/utils/common/simpleallocator.h b/utils/common/simpleallocator.h index 71474ca24..9419a8f69 100644 --- a/utils/common/simpleallocator.h +++ b/utils/common/simpleallocator.h @@ -156,10 +156,6 @@ public: ptr->T::~T(); } - SimplePool* getPool() - { - return fPool; - } void setPool(SimplePool* pool) { fPool = pool; diff --git a/utils/funcexp/funcexp.cpp b/utils/funcexp/funcexp.cpp index 3c530f381..66782cc54 100644 --- a/utils/funcexp/funcexp.cpp +++ b/utils/funcexp/funcexp.cpp @@ -139,6 +139,7 @@ FuncExp::FuncExp() fFuncMap["least"] = new Func_least(); //dlh fFuncMap["left"] = new Func_left(); //dlh fFuncMap["length"] = new Func_length(); + fFuncMap["octet_length"] = new Func_length(); // MariaDB 10.3 fFuncMap["ln"] = new Func_log(); fFuncMap["locate"] = new Func_instr(); fFuncMap["log"] = new Func_log(); @@ -152,6 +153,7 @@ FuncExp::FuncExp() fFuncMap["microsecond"] = new Func_microsecond(); fFuncMap["minute"] = new Func_minute(); //dlh fFuncMap["mod"] = new Func_mod(); //dlh + fFuncMap["MOD"] = new Func_mod(); // MariaDB 10.3 fFuncMap["%"] = new Func_mod(); //dlh fFuncMap["md5"] = new Func_md5(); fFuncMap["mid"] = new Func_substr(); diff --git a/utils/funcexp/funcexpwrapper.cpp b/utils/funcexp/funcexpwrapper.cpp index 9c67fd37e..8bf1adf4f 100644 --- a/utils/funcexp/funcexpwrapper.cpp +++ b/utils/funcexp/funcexpwrapper.cpp @@ -33,7 +33,6 @@ #include "objectreader.h" using namespace messageqcpp; -using namespace boost; using namespace rowgroup; using namespace execplan; @@ -103,12 +102,12 @@ void FuncExpWrapper::deserialize(ByteStream& bs) bs >> rcsCount; for (i = 0; i < fCount; i++) - filters.push_back(shared_ptr(ObjectReader::createParseTree(bs))); + filters.push_back(boost::shared_ptr(ObjectReader::createParseTree(bs))); for (i = 0; i < rcsCount; i++) { ReturnedColumn* rc = (ReturnedColumn*) ObjectReader::createTreeNode(bs); - rcs.push_back(shared_ptr(rc)); + rcs.push_back(boost::shared_ptr(rc)); } } @@ -125,12 +124,12 @@ bool FuncExpWrapper::evaluate(Row* r) return true; } -void FuncExpWrapper::addFilter(const shared_ptr& f) +void FuncExpWrapper::addFilter(const boost::shared_ptr& f) { filters.push_back(f); } -void FuncExpWrapper::addReturnedColumn(const shared_ptr& rc) +void FuncExpWrapper::addReturnedColumn(const boost::shared_ptr& rc) { rcs.push_back(rc); } diff --git a/utils/funcexp/functor.h b/utils/funcexp/functor.h index a16917453..20914e99e 100644 --- a/utils/funcexp/functor.h +++ b/utils/funcexp/functor.h @@ -166,8 +166,8 @@ protected: virtual std::string doubleToString(double); virtual int64_t nowDatetime(); - virtual int64_t addTime(DateTime& dt1, Time& dt2); - virtual int64_t addTime(Time& dt1, Time& dt2); + virtual int64_t addTime(DateTime& dt1, dataconvert::Time& dt2); + virtual int64_t addTime(dataconvert::Time& dt1, dataconvert::Time& dt2); std::string fFuncName; diff --git a/utils/funcexp/functor_str.h b/utils/funcexp/functor_str.h index c71cdec91..b7051be4e 100644 --- a/utils/funcexp/functor_str.h +++ b/utils/funcexp/functor_str.h @@ -24,6 +24,7 @@ #include "functor.h" +using namespace std; namespace funcexp { @@ -126,7 +127,7 @@ protected: exponent = (int)floor(log10( fabs(floatVal))); base = floatVal * pow(10, -1.0 * exponent); - if (std::isnan(exponent) || std::isnan(base)) + if (isnan(exponent) || isnan(base)) { snprintf(buf, 20, "%f", floatVal); fFloatStr = execplan::removeTrailing0(buf, 20); diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h index f1fc39dcc..a07cbcc87 100644 --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -58,6 +58,9 @@ #include "../winport/winport.h" +// Workaround for my_global.h #define of isnan(X) causing a std::std namespace +using namespace std; + namespace rowgroup { @@ -1019,7 +1022,7 @@ inline void Row::setFloatField(float val, uint32_t colIndex) //N.B. There is a bug in boost::any or in gcc where, if you store a nan, you will get back a nan, // but not necessarily the same bits that you put in. This only seems to be for float (double seems // to work). - if (std::isnan(val)) + if (isnan(val)) setUintField<4>(joblist::FLOATNULL, colIndex); else *((float*) &data[offsets[colIndex]]) = val; From 5d245c8932fc8698ca26ab70d4f7b55efaa9567e Mon Sep 17 00:00:00 2001 From: David Hill Date: Thu, 2 Aug 2018 10:59:38 -0500 Subject: [PATCH 101/123] MCOL-1498 - add prompt for password on non-distibute installs --- oamapps/postConfigure/postConfigure.cpp | 72 ++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 8 deletions(-) diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index 98227da9d..774a7a490 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -3090,7 +3090,9 @@ int main(int argc, char *argv[]) //check if dbrm data resides in older directory path and inform user if it does dbrmDirCheck(); - if ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM && pmNumber == 1) { + if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || + ( (IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM) && pmwithum ) ) + { //run the mysql / mysqld setup scripts cout << endl << "===== Running the MariaDB ColumnStore MariaDB Server setup scripts =====" << endl << endl; @@ -3098,7 +3100,61 @@ int main(int argc, char *argv[]) // call the mysql setup scripts mysqlSetup(); - sleep(5); + sleep(3); + } + + if ( IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM || + pmNumber > 1 ) { + + if ( password.empty() ) + { + cout << endl; + cout << "Next step is to enter the password to access the other Servers." << endl; + cout << "This is either your password or you can default to using a ssh key" << endl; + cout << "If using a password, the password needs to be the same on all Servers." << endl << endl; + } + + while(true) + { + char *pass1, *pass2; + + if ( noPrompting ) { + cout << "Enter password, hit 'enter' to default to using a ssh key, or 'exit' > " << endl; + if ( password.empty() ) + password = "ssh"; + break; + } + + //check for command line option password + //if ( !password.empty() ) + // break; + + pass1=getpass("Enter password, hit 'enter' to default to using a ssh key, or 'exit' > "); + if ( strcmp(pass1, "") == 0 ) { + password = "ssh"; + break; + } + + string p1 = pass1; + if ( p1 == "exit") + exit(0); + + pass2=getpass("Confirm password > "); + string p2 = pass2; + if ( p1 == p2 ) { + password = p2; + break; + } + else + cout << "Password mismatch, please re-enter" << endl; + } + + //add single quote for special characters + if ( password != "ssh" ) + { + password = "'" + password + "'"; + } + } int thread_id = 0; @@ -3173,7 +3229,7 @@ int main(int argc, char *argv[]) if( !pkgCheck(columnstorePackage) ) exit(1); - if ( password.empty() ) +/* if ( password.empty() ) { cout << endl; cout << "Next step is to enter the password to access the other Servers." << endl; @@ -3221,10 +3277,10 @@ int main(int argc, char *argv[]) { password = "'" + password + "'"; } - +*/ checkSystemMySQLPort(mysqlPort, sysConfig, USER, password, childmodulelist, IserverTypeInstall, pmwithum); - if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || +/* if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || ( (IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM) && pmwithum ) ) { cout << endl << "===== Running the MariaDB ColumnStore MariaDB ColumnStore setup scripts =====" << endl << endl; @@ -3233,7 +3289,7 @@ int main(int argc, char *argv[]) mysqlSetup(); sleep(5); } - +*/ string AmazonInstall = "0"; if ( amazonInstall ) AmazonInstall = "1"; @@ -3411,7 +3467,7 @@ int main(int argc, char *argv[]) cout << " DONE" << endl; } } - else +/* else { if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || ( (IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM) && pmwithum ) ) @@ -3423,7 +3479,7 @@ int main(int argc, char *argv[]) sleep(5); } } - +*/ //configure data redundancy if (DataRedundancy) { From 0837f9a520eb4315f5b5d5e20d727d45bc35d9e4 Mon Sep 17 00:00:00 2001 From: David Hill Date: Thu, 2 Aug 2018 11:51:27 -0500 Subject: [PATCH 102/123] MCOL-1498 - add prompt for password on non-distibute installs --- oamapps/postConfigure/postConfigure.cpp | 141 ++++++------------------ 1 file changed, 33 insertions(+), 108 deletions(-) diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index 774a7a490..c31f0c5de 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -3104,57 +3104,53 @@ int main(int argc, char *argv[]) } if ( IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM || - pmNumber > 1 ) { - + pmNumber > 1 ) + { if ( password.empty() ) { cout << endl; cout << "Next step is to enter the password to access the other Servers." << endl; cout << "This is either your password or you can default to using a ssh key" << endl; cout << "If using a password, the password needs to be the same on all Servers." << endl << endl; - } - - while(true) - { - char *pass1, *pass2; if ( noPrompting ) { cout << "Enter password, hit 'enter' to default to using a ssh key, or 'exit' > " << endl; - if ( password.empty() ) - password = "ssh"; - break; - } - - //check for command line option password - //if ( !password.empty() ) - // break; - - pass1=getpass("Enter password, hit 'enter' to default to using a ssh key, or 'exit' > "); - if ( strcmp(pass1, "") == 0 ) { password = "ssh"; - break; } + else + { + while(true) + { + char *pass1, *pass2; - string p1 = pass1; - if ( p1 == "exit") - exit(0); + pass1=getpass("Enter password, hit 'enter' to default to using a ssh key, or 'exit' > "); + if ( strcmp(pass1, "") == 0 ) { + password = "ssh"; + break; + } - pass2=getpass("Confirm password > "); - string p2 = pass2; - if ( p1 == p2 ) { - password = p2; - break; - } - else - cout << "Password mismatch, please re-enter" << endl; - } + string p1 = pass1; + if ( p1 == "exit") + exit(0); - //add single quote for special characters - if ( password != "ssh" ) - { - password = "'" + password + "'"; - } + pass2=getpass("Confirm password > "); + string p2 = pass2; + if ( p1 == p2 ) { + password = p2; + break; + } + else + cout << "Password mismatch, please re-enter" << endl; + } + //add single quote for special characters + if ( password != "ssh" ) + { + password = "'" + password + "'"; + } + + } + } } int thread_id = 0; @@ -3229,67 +3225,8 @@ int main(int argc, char *argv[]) if( !pkgCheck(columnstorePackage) ) exit(1); -/* if ( password.empty() ) - { - cout << endl; - cout << "Next step is to enter the password to access the other Servers." << endl; - cout << "This is either your password or you can default to using a ssh key" << endl; - cout << "If using a password, the password needs to be the same on all Servers." << endl << endl; - } - - while(true) - { - char *pass1, *pass2; - - if ( noPrompting ) { - cout << "Enter password, hit 'enter' to default to using a ssh key, or 'exit' > " << endl; - if ( password.empty() ) - password = "ssh"; - break; - } - - //check for command line option password - if ( !password.empty() ) - break; - - pass1=getpass("Enter password, hit 'enter' to default to using a ssh key, or 'exit' > "); - if ( strcmp(pass1, "") == 0 ) { - password = "ssh"; - break; - } - - if ( pass1 == "exit") - exit(0); - - string p1 = pass1; - pass2=getpass("Confirm password > "); - string p2 = pass2; - if ( p1 == p2 ) { - password = p2; - break; - } - else - cout << "Password mismatch, please re-enter" << endl; - } - - //add single quote for special characters - if ( password != "ssh" ) - { - password = "'" + password + "'"; - } -*/ checkSystemMySQLPort(mysqlPort, sysConfig, USER, password, childmodulelist, IserverTypeInstall, pmwithum); -/* if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || - ( (IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM) && pmwithum ) ) - { - cout << endl << "===== Running the MariaDB ColumnStore MariaDB ColumnStore setup scripts =====" << endl << endl; - - // call the mysql setup scripts - mysqlSetup(); - sleep(5); - } -*/ string AmazonInstall = "0"; if ( amazonInstall ) AmazonInstall = "1"; @@ -3467,19 +3404,7 @@ int main(int argc, char *argv[]) cout << " DONE" << endl; } } -/* else - { - if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || - ( (IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM) && pmwithum ) ) - { - cout << endl << "===== Running the MariaDB ColumnStore MariaDB ColumnStore setup scripts =====" << endl << endl; - - // call the mysql setup scripts - mysqlSetup(); - sleep(5); - } - } -*/ + //configure data redundancy if (DataRedundancy) { From 3a159908f4ca9e4ae5e3e169121de70720f830c7 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Fri, 3 Aug 2018 11:55:49 +0100 Subject: [PATCH 103/123] MCOL-1385 Fix window functions MariaDB maps MEDIAN to PERCENTILE_CONT and also has PERCENTILE_DISC now. So remove our MEDIAN UDAF but keep the source as it is used in docs. --- dbcon/mysql/ha_window_function.cpp | 7 +++ utils/udfsdk/CMakeLists.txt | 2 +- utils/udfsdk/mcsv1_udaf.cpp | 2 - utils/udfsdk/udfmysql.cpp | 72 ------------------------------ 4 files changed, 8 insertions(+), 75 deletions(-) diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp index ac7a2af3a..0c57ce8bc 100644 --- a/dbcon/mysql/ha_window_function.cpp +++ b/dbcon/mysql/ha_window_function.cpp @@ -289,6 +289,13 @@ string ConvertFuncName(Item_sum* item) return "PERCENT_RANK"; break; + case Item_sum::PERCENTILE_CONT_FUNC: + return "PERCENTILE_CONT"; + break; + + case Item_sum::PERCENTILE_DISC_FUNC: + return "PERCENTILE_DISC"; + case Item_sum::CUME_DIST_FUNC: return "CUME_DIST"; break; diff --git a/utils/udfsdk/CMakeLists.txt b/utils/udfsdk/CMakeLists.txt index 01009e35a..ad4460977 100755 --- a/utils/udfsdk/CMakeLists.txt +++ b/utils/udfsdk/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ########### next target ############### -set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp regr_avgx.cpp avgx.cpp) +set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp avg_mode.cpp regr_avgx.cpp avgx.cpp) add_definitions(-DMYSQL_DYNAMIC_PLUGIN) diff --git a/utils/udfsdk/mcsv1_udaf.cpp b/utils/udfsdk/mcsv1_udaf.cpp index b042d63f5..9e4596440 100644 --- a/utils/udfsdk/mcsv1_udaf.cpp +++ b/utils/udfsdk/mcsv1_udaf.cpp @@ -34,7 +34,6 @@ using namespace mcsv1sdk; UDAF_MAP UDAFMap::fm; #include "allnull.h" #include "ssq.h" -#include "median.h" #include "avg_mode.h" #include "regr_avgx.h" #include "avgx.h" @@ -52,7 +51,6 @@ UDAF_MAP& UDAFMap::getMap() // the function names passed to the interface is always in lower case. fm["allnull"] = new allnull(); fm["ssq"] = new ssq(); - fm["median"] = new median(); fm["avg_mode"] = new avg_mode(); fm["regr_avgx"] = new regr_avgx(); fm["avgx"] = new avgx(); diff --git a/utils/udfsdk/udfmysql.cpp b/utils/udfsdk/udfmysql.cpp index b0b2ebb9c..1c0fee1db 100644 --- a/utils/udfsdk/udfmysql.cpp +++ b/utils/udfsdk/udfmysql.cpp @@ -349,78 +349,6 @@ extern "C" return data->sumsq; } -//======================================================================= - - /** - * MEDIAN connector stub - */ -#ifdef _MSC_VER - __declspec(dllexport) -#endif - my_bool median_init(UDF_INIT* initid, UDF_ARGS* args, char* message) - { - if (args->arg_count != 1) - { - strcpy(message, "median() requires one argument"); - return 1; - } - - /* - if (!(data = (struct ssq_data*) malloc(sizeof(struct ssq_data)))) - { - strmov(message,"Couldn't allocate memory"); - return 1; - } - data->sumsq = 0; - - initid->ptr = (char*)data; - */ - return 0; - } - -#ifdef _MSC_VER - __declspec(dllexport) -#endif - void median_deinit(UDF_INIT* initid) - { -// free(initid->ptr); - } - -#ifdef _MSC_VER - __declspec(dllexport) -#endif - void - median_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), - char* message __attribute__((unused))) - { -// struct ssq_data* data = (struct ssq_data*)initid->ptr; -// data->sumsq = 0; - } - -#ifdef _MSC_VER - __declspec(dllexport) -#endif - void - median_add(UDF_INIT* initid, UDF_ARGS* args, - char* is_null, - char* message __attribute__((unused))) - { -// struct ssq_data* data = (struct ssq_data*)initid->ptr; -// double val = cvtArgToDouble(args->arg_type[0], args->args[0]); -// data->sumsq = val*val; - } - -#ifdef _MSC_VER - __declspec(dllexport) -#endif - long long median(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), - char* is_null, char* error __attribute__((unused))) - { -// struct ssq_data* data = (struct ssq_data*)initid->ptr; -// return data->sumsq; - return 0; - } - /** * avg_mode connector stub */ From 515cc31d4fe7924eba6d5141c52975c5c22e9245 Mon Sep 17 00:00:00 2001 From: Ben Thompson Date: Mon, 6 Aug 2018 10:10:52 -0500 Subject: [PATCH 104/123] MCOL-1610: modify so if moving a dbroot fails with gluster it is reassigned to original owner. Add logging around failure for mounting gluster volumes. --- oam/oamcpp/liboamcpp.cpp | 101 +++++++++++++++++++++---------------- procmgr/main.cpp | 20 ++++---- procmgr/processmanager.cpp | 4 +- procmon/processmonitor.cpp | 7 +++ 4 files changed, 76 insertions(+), 56 deletions(-) diff --git a/oam/oamcpp/liboamcpp.cpp b/oam/oamcpp/liboamcpp.cpp index 9a405e978..7483ca239 100644 --- a/oam/oamcpp/liboamcpp.cpp +++ b/oam/oamcpp/liboamcpp.cpp @@ -5295,6 +5295,7 @@ namespace oam dbrootList dbroot1; dbroot1.push_back(*pt1); + bool returnDbRoot = false; //send msg to unmount dbroot if module is not offline int opState; @@ -5306,7 +5307,6 @@ namespace oam {} if (opState != oam::AUTO_OFFLINE || opState != oam::AUTO_DISABLED) { -// bool unmountPass = true; try { mountDBRoot(dbroot1, false); @@ -5316,13 +5316,8 @@ namespace oam writeLog("ERROR: dbroot failed to unmount", LOG_TYPE_ERROR ); cout << endl << "ERROR: umountDBRoot api failure" << endl; exceptionControl("manualMovePmDbroot", API_FAILURE); -// unmountPass = false; } -// if ( !unmountPass) { -// dbrootlist.erase(pt1); -// break; -// } } //check for amazon moving required @@ -5340,38 +5335,79 @@ namespace oam //if Gluster, do the assign command if ( DataRedundancyConfig == "y") { - try { + try + { string errmsg; int ret = glusterctl(oam::GLUSTER_ASSIGN, *pt1, toPM, errmsg); - if ( ret != 0 ) + if ( ret == 0 ) + { + todbrootConfigList.push_back(*pt2); + residedbrootConfigList.erase(pt2); + } + else { cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID + ", error: " + errmsg << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID + ", error: " + errmsg, LOG_TYPE_ERROR ); + returnDbRoot = true; } } catch (exception& e) { cout << endl << "**** glusterctl API exception: " << e.what() << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID, LOG_TYPE_ERROR ); + returnDbRoot = true; } catch (...) { cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID, LOG_TYPE_ERROR ); + returnDbRoot = true; } } - todbrootConfigList.push_back(*pt2); - - residedbrootConfigList.erase(pt2); - + if (returnDbRoot) + { + // something went wrong return it back to original owner + try + { + string errmsg; + writeLog("reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR ); + int ret = glusterctl(oam::GLUSTER_ASSIGN, *pt1, residePM, errmsg); + if ( ret != 0 ) + { + cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID + ", error: " + errmsg << endl; + writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID + ", error: " + errmsg, LOG_TYPE_ERROR ); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); + } + mountDBRoot(dbroot1); + //get updated Columnstore.xml distributed + distributeConfigFile("system"); + return; + } + catch (exception& e) + { + cout << endl << "**** glusterctl API exception: " << e.what() << endl; + cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID << endl; + writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR ); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); + } + catch (...) + { + cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; + cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID << endl; + writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR ); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); + } + } break; } } } + + //set the 2 pms dbroot config try { @@ -5381,7 +5417,7 @@ namespace oam { writeLog("ERROR: setPmDbrootConfig api failure for pm" + residePMID , LOG_TYPE_ERROR ); cout << endl << "ERROR: setPmDbrootConfig api failure for pm" + residePMID << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); } try @@ -5392,7 +5428,7 @@ namespace oam { writeLog("ERROR: setPmDbrootConfig api failure for pm" + toPMID , LOG_TYPE_ERROR ); cout << endl << "ERROR: setPmDbrootConfig api failure for pm" + toPMID << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); } //send msg to mount dbroot @@ -5980,7 +6016,7 @@ namespace oam } if (!found) { - writeLog("No dbroots found in ../Calpont/local/moveDbrootTransactionLog", LOG_TYPE_DEBUG ); + writeLog("No dbroots found in " + InstallDir + "/moveDbrootTransactionLog", LOG_TYPE_DEBUG ); cout << "No dbroots found in " << fileName << endl; } @@ -6518,32 +6554,7 @@ namespace oam for( ; pt3 != dbrootlist.end() ; pt3++) { todbrootConfigList.push_back(*pt3); - -/* if ( DataRedundancyConfig == "y") - { - try { - string errmsg; - int ret = glusterctl(oam::GLUSTER_ASSIGN, itoa(*pt3), toPM, errmsg); - if ( ret != 0 ) - { - cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(*pt3) + " to pm" + toPMID + ", error: " + errmsg << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } - } - catch (exception& e) - { - cout << endl << "**** glusterctl API exception: " << e.what() << endl; - cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(*pt3) + " to pm" + toPMID << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } - catch (...) - { - cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; - cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(*pt3) + " to pm" + toPMID << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } - } -*/ } + } try { @@ -6961,12 +6972,14 @@ namespace oam { cout << endl << "**** glusterctl API exception: " << e.what() << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID) << endl; + writeLog("FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID), LOG_TYPE_ERROR ); exceptionControl("removeDbroot", API_FAILURE); } catch (...) { cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID) << endl; + writeLog("FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID), LOG_TYPE_ERROR ); exceptionControl("removeDbroot", API_FAILURE); } } diff --git a/procmgr/main.cpp b/procmgr/main.cpp index 2747fda16..995c851c2 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -1574,7 +1574,7 @@ void pingDeviceThread() { // no dbroots, fail module log.writeLog(__LINE__, "autoUnMovePmDbroot left no dbroots mounted, failing module restart: " + moduleName, LOG_TYPE_WARNING); - + //Issue an alarm aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET); @@ -1597,7 +1597,7 @@ void pingDeviceThread() //set query system state ready processManager.setQuerySystemState(true); - break; + goto break_case; } } catch(...) @@ -1619,25 +1619,24 @@ void pingDeviceThread() if ( retry == 5 ) { log.writeLog(__LINE__, "autoUnMovePmDbroot: Failed. Fail Module", LOG_TYPE_WARNING); - + log.writeLog(__LINE__, "System DBRM READ ONLY - Verify dbroot mounts.", LOG_TYPE_WARNING); //Issue an alarm aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET); //set module to disable state processManager.disableModule(moduleName, true); + // Need to do something here to verify data mounts before resuming + // Best to assume if we reach this you need to put into readonly and verify all dbroots are mounted + //call dbrm control - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - - // resume the dbrm - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); + oam.dbrmctl("readonly"); + log.writeLog(__LINE__, "'dbrmctl readonly' done", LOG_TYPE_DEBUG); //clear count moduleInfoList[moduleName] = 0; - processManager.setSystemState(oam::ACTIVE); + processManager.setSystemState(oam::DEGRADED); //set query system state ready processManager.setQuerySystemState(true); @@ -2358,6 +2357,7 @@ void pingDeviceThread() } } //end of for loop } + break_case: // check and take action if LAN outage is flagged if (LANOUTAGESUPPORT && !LANOUTAGEACTIVE && LOCALNICDOWN) diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 0a054f9c3..3cc094a0a 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -6244,7 +6244,7 @@ int ProcessManager::sendMsgProcMon( std::string module, ByteStream msg, int requ string IPAddr = sysConfig->getConfig(msgPort, "IPAddr"); if ( IPAddr == oam::UnassignedIpAddr ) { - log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "sendMsgProcMon ping failure " + module + " " + IPAddr, LOG_TYPE_ERROR); return oam::API_SUCCESS; } @@ -6253,7 +6253,7 @@ int ProcessManager::sendMsgProcMon( std::string module, ByteStream msg, int requ string cmd = cmdLine + IPAddr + cmdOption; if ( system(cmd.c_str()) != 0) { //ping failure - log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "sendMsgProcMon ping failure " + module + " " + IPAddr, LOG_TYPE_ERROR); return oam::API_SUCCESS; } } diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index 754d6ccf2..8b7b13165 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -6107,10 +6107,13 @@ int ProcessMonitor::glusterAssign(std::string dbrootID) command = "sudo mount -tglusterfs -odirect-io-mode=enable " + moduleIPAddr + ":/dbroot" + dbrootID + " " + startup::StartUp::installDir() + "/data" + dbrootID + " > /tmp/glusterAssign.txt 2>&1"; } + int ret = system(command.c_str()); if ( WEXITSTATUS(ret) != 0 ) { + log.writeLog(__LINE__, "glusterAssign mount failure: dbroot: " + dbrootID + " error: " + oam.itoa(WEXITSTATUS(ret)), LOG_TYPE_ERROR); + ifstream in("/tmp/glusterAssign.txt"); in.seekg(0, std::ios::end); int size = in.tellg(); @@ -6151,9 +6154,13 @@ int ProcessMonitor::glusterUnassign(std::string dbrootID) { command = "sudo umount -f " + startup::StartUp::installDir() + "/data" + dbrootID + " > /tmp/glusterUnassign.txt 2>&1"; } + int ret = system(command.c_str()); + if ( WEXITSTATUS(ret) != 0 ) { + log.writeLog(__LINE__, "glusterUnassign mount failure: dbroot: " + dbrootID + " error: " + oam.itoa(WEXITSTATUS(ret)), LOG_TYPE_ERROR); + ifstream in("/tmp/glusterUnassign.txt"); in.seekg(0, std::ios::end); int size = in.tellg(); From 8a4294978f35f8728690641bdf0c5bbe195e34c8 Mon Sep 17 00:00:00 2001 From: David Hill Date: Tue, 7 Aug 2018 08:54:08 -0500 Subject: [PATCH 105/123] MCOL-1605 - changed error to debug, alarms trying to get issued before procmgr is up --- oamapps/alarmmanager/alarmmanager.cpp | 8 ++++---- procmon/main.cpp | 9 +++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/oamapps/alarmmanager/alarmmanager.cpp b/oamapps/alarmmanager/alarmmanager.cpp index b9ba4f702..b9e8c20df 100644 --- a/oamapps/alarmmanager/alarmmanager.cpp +++ b/oamapps/alarmmanager/alarmmanager.cpp @@ -422,7 +422,7 @@ void ALARMManager::sendAlarmReport (const char* componentID, int alarmID, int st int pid = getpid(); int tid = gettid(); - // get reporting Pprocess Name + // get reporting Process Name string processName; if ( repProcessName.empty()) { // get current process name @@ -468,7 +468,7 @@ void ALARMManager::sendAlarmReport (const char* componentID, int alarmID, int st args.add("sendAlarmReport error:"); args.add(e.what()); msg.format(args); - ml.logErrorMessage(msg); + ml.logDebugMessage(msg); } catch (std::exception& e) { @@ -479,7 +479,7 @@ void ALARMManager::sendAlarmReport (const char* componentID, int alarmID, int st args.add("sendAlarmReport error:"); args.add(e.what()); msg.format(args); - ml.logErrorMessage(msg); + ml.logDebugMessage(msg); } catch (...) { @@ -490,7 +490,7 @@ void ALARMManager::sendAlarmReport (const char* componentID, int alarmID, int st args.add("sendAlarmReport error:"); args.add("general failure"); msg.format(args); - ml.logErrorMessage(msg); + ml.logDebugMessage(msg); } return; diff --git a/procmon/main.cpp b/procmon/main.cpp index 424944f6b..1c8ce19a9 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -308,8 +308,9 @@ int main(int argc, char **argv) if ( count >= 120 ) { log.writeLog(__LINE__, "Standby PM not responding, infinidb shutting down", LOG_TYPE_CRITICAL); //Set the alarm - aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET); - sleep (1); + // aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET); + // sleep (1); + string cmd = startup::StartUp::installDir() + "/bin/infinidb stop > /dev/null 2>&1"; system(cmd.c_str()); } @@ -493,8 +494,8 @@ int main(int argc, char **argv) { log.writeLog(__LINE__, "Check DB mounts failed, shutting down", LOG_TYPE_CRITICAL); //Set the alarm - aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET); - sleep (1); + // aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET); + // sleep (1); string cmd = startup::StartUp::installDir() + "/bin/columnstore stop > /dev/null 2>&1"; system(cmd.c_str()); } From 5927b1cf81d072300735266e7b9d16bfd6c57529 Mon Sep 17 00:00:00 2001 From: David Hill Date: Fri, 10 Aug 2018 16:01:12 -0500 Subject: [PATCH 106/123] MCOL-1523 - fix issue with query failing and enablemodule failing trying to get um1 back. --- oam/etc/ProcessConfig.xml | 4 ++-- procmgr/main.cpp | 24 ------------------------ procmgr/processmanager.cpp | 33 ++++++++++++++++----------------- 3 files changed, 18 insertions(+), 43 deletions(-) diff --git a/oam/etc/ProcessConfig.xml b/oam/etc/ProcessConfig.xml index 8a0c3618f..ca5d745f0 100644 --- a/oam/etc/ProcessConfig.xml +++ b/oam/etc/ProcessConfig.xml @@ -107,7 +107,7 @@ WriteEngineServer pm* DBRMWorkerNode - * + @ ExeMgr * SIMPLEX @@ -122,7 +122,7 @@ WriteEngineServer pm* DBRMWorkerNode - * + @ DDLProc @ SIMPLEX diff --git a/procmgr/main.cpp b/procmgr/main.cpp index 995c851c2..55f675cec 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -1523,9 +1523,6 @@ void pingDeviceThread() break; //set query system state not ready - BRM::DBRM dbrm; - dbrm.setSystemQueryReady(false); - processManager.setQuerySystemState(false); processManager.setSystemState(oam::BUSY_INIT); @@ -1806,9 +1803,6 @@ void pingDeviceThread() } } - //enable query stats - dbrm.setSystemQueryReady(true); - //set query system state ready processManager.setQuerySystemState(true); @@ -1865,9 +1859,6 @@ void pingDeviceThread() else processManager.setSystemState(oam::ACTIVE); - //enable query stats - dbrm.setSystemQueryReady(true); - //set query system state ready processManager.setQuerySystemState(true); @@ -1929,9 +1920,6 @@ void pingDeviceThread() log.writeLog(__LINE__, "module is down: " + moduleName, LOG_TYPE_CRITICAL); //set query system state not ready - BRM::DBRM dbrm; - dbrm.setSystemQueryReady(false); - processManager.setQuerySystemState(false); processManager.setSystemState(oam::BUSY_INIT); @@ -1993,9 +1981,6 @@ void pingDeviceThread() oam.dbrmctl("resume"); log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - //enable query stats - dbrm.setSystemQueryReady(true); - //set query system state ready processManager.setQuerySystemState(true); @@ -2201,9 +2186,6 @@ void pingDeviceThread() //set recycle process processManager.recycleProcess(moduleName); - //enable query stats - dbrm.setSystemQueryReady(true); - //set query system state ready processManager.setQuerySystemState(true); @@ -2220,9 +2202,6 @@ void pingDeviceThread() oam.dbrmctl("resume"); log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - //enable query stats - dbrm.setSystemQueryReady(true); - //set query system state ready processManager.setQuerySystemState(true); } @@ -2236,9 +2215,6 @@ void pingDeviceThread() //set recycle process processManager.recycleProcess(moduleName); - //enable query stats - dbrm.setSystemQueryReady(true); - //set query system state ready processManager.setQuerySystemState(true); } diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 3cc094a0a..5f15f3446 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -829,8 +829,10 @@ void processMSG(messageqcpp::IOSocket* cfIos) if (opState == oam::MAN_OFFLINE || opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED || opState == oam::AUTO_OFFLINE) { - oam.dbrmctl("halt"); - log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG); + processManager.setSystemState(oam::BUSY_INIT); + + //set query system state not ready + processManager.setQuerySystemState(false); status = processManager.disableModule(moduleName, true); log.writeLog(__LINE__, "Disable Module Completed on " + moduleName, LOG_TYPE_INFO); @@ -839,14 +841,12 @@ void processMSG(messageqcpp::IOSocket* cfIos) //check for SIMPLEX Processes on mate might need to be started processManager.checkSimplexModule(moduleName); + + processManager.setSystemState(oam::ACTIVE); + + //set query system state ready + processManager.setQuerySystemState(true); - //call dbrm control -// oam.dbrmctl("reload"); -// log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - - // resume the dbrm - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); } else { @@ -910,7 +910,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) DeviceNetworkList::iterator listPT = devicenetworklist.begin(); - //stopModules being removed with the REMOVE option, which will stop process + // do stopmodule then enable for( ; listPT != devicenetworklist.end() ; listPT++) { string moduleName = (*listPT).DeviceName; @@ -933,6 +933,9 @@ void processMSG(messageqcpp::IOSocket* cfIos) } if (opState == oam::MAN_DISABLED) { + processManager.stopModule(moduleName, graceful, manualFlag); + log.writeLog(__LINE__, "stop Module Completed on " + moduleName, LOG_TYPE_INFO); + status = processManager.enableModule(moduleName, oam::MAN_OFFLINE); log.writeLog(__LINE__, "Enable Module Completed on " + moduleName, LOG_TYPE_INFO); } @@ -2758,9 +2761,6 @@ void processMSG(messageqcpp::IOSocket* cfIos) log.writeLog(__LINE__, "MSG RECEIVED: Process Restarted on " + moduleName + "/" + processName); //set query system states not ready - BRM::DBRM dbrm; - dbrm.setSystemQueryReady(false); - processManager.setQuerySystemState(false); processManager.setSystemState(oam::BUSY_INIT); @@ -2841,7 +2841,8 @@ void processMSG(messageqcpp::IOSocket* cfIos) break; sleep(1); } - dbrm.setSystemQueryReady(true); + processManager.setQuerySystemState(true); + } // if a DDLProc was restarted, reinit DMLProc @@ -2894,8 +2895,6 @@ void processMSG(messageqcpp::IOSocket* cfIos) } //enable query stats - dbrm.setSystemQueryReady(true); - processManager.setQuerySystemState(true); processManager.setSystemState(oam::ACTIVE); @@ -6489,7 +6488,7 @@ void ProcessManager::setQuerySystemState(bool set) log.writeLog(__LINE__, "setQuerySystemState = " + oam.itoa(set), LOG_TYPE_DEBUG); try { - dbrm.setSystemQueryReady(set); + dbrm.setSystemQueryReady(true); log.writeLog(__LINE__, "setQuerySystemState successful", LOG_TYPE_DEBUG); } catch(...) From b5a39ea78901dcd89078de5d8a674837c3b35dbc Mon Sep 17 00:00:00 2001 From: David Hill Date: Fri, 10 Aug 2018 16:15:58 -0500 Subject: [PATCH 107/123] MCOL-1523 - fix issue with query failing and enablemodule failing trying to get um1 back. --- procmgr/processmanager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 5f15f3446..a29502ae8 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -6488,7 +6488,7 @@ void ProcessManager::setQuerySystemState(bool set) log.writeLog(__LINE__, "setQuerySystemState = " + oam.itoa(set), LOG_TYPE_DEBUG); try { - dbrm.setSystemQueryReady(true); + dbrm.setSystemQueryReady(set); log.writeLog(__LINE__, "setQuerySystemState successful", LOG_TYPE_DEBUG); } catch(...) From e903e47201337bd49129bb68c9161e3caa510063 Mon Sep 17 00:00:00 2001 From: David Hill Date: Fri, 10 Aug 2018 18:37:40 -0500 Subject: [PATCH 108/123] MCOL-1523 - fix issue with query failing and enablemodule failing trying to get um1 back. --- procmgr/processmanager.cpp | 15 +++++++++++++-- procmon/main.cpp | 9 ++++++--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index a29502ae8..a8258f19f 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -1249,6 +1249,9 @@ void processMSG(messageqcpp::IOSocket* cfIos) log.writeLog(__LINE__, "STOPSYSTEM: ACK back to sender"); } + //set query system state ready + processManager.setQuerySystemState(true); + startsystemthreadStop = false; break; @@ -2848,6 +2851,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) // if a DDLProc was restarted, reinit DMLProc if( processName == "DDLProc") { processManager.reinitProcessType("DMLProc"); + processManager.setQuerySystemState(true); } //only run on auto process restart @@ -2894,7 +2898,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) } } - //enable query stats + //set query system states ready processManager.setQuerySystemState(true); processManager.setSystemState(oam::ACTIVE); @@ -3773,6 +3777,7 @@ void ProcessManager::setSystemState(uint16_t state) Oam oam; ALARMManager aManager; Configuration config; + ProcessManager processManager(config, log); log.writeLog(__LINE__, "Set System State = " + oamState[state], LOG_TYPE_DEBUG); @@ -3793,6 +3798,9 @@ void ProcessManager::setSystemState(uint16_t state) // Process Alarms string system = "System"; if( state == oam::ACTIVE ) { + //set query system states ready + processManager.setQuerySystemState(true); + //clear alarms if set aManager.sendAlarmReport(system.c_str(), SYSTEM_DOWN_AUTO, CLEAR); aManager.sendAlarmReport(system.c_str(), SYSTEM_DOWN_MANUAL, CLEAR); @@ -6992,7 +7000,7 @@ void startSystemThread(oam::DeviceNetworkList Devicenetworklist) } //set query system state not ready - processManager.setQuerySystemState(true); + processManager.setQuerySystemState(false); // Bug 4554: Wait until DMLProc is finished with rollback if (status == oam::API_SUCCESS) @@ -7061,6 +7069,9 @@ void startSystemThread(oam::DeviceNetworkList Devicenetworklist) processManager.setSystemState(rtn); } + //set query system state ready + processManager.setQuerySystemState(true); + // exit thread log.writeLog(__LINE__, "startSystemThread Exit", LOG_TYPE_DEBUG); startsystemthreadStatus = status; diff --git a/procmon/main.cpp b/procmon/main.cpp index 1c8ce19a9..ad05a4f95 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -336,7 +336,7 @@ int main(int argc, char **argv) sysConfig->setConfig("ProcMgr_Alarm", "IPAddr", IPaddr); log.writeLog(__LINE__, "set ProcMgr IPaddr to Old Standby Module: " + IPaddr, LOG_TYPE_DEBUG); - //update Calpont Config table + //update MariaDB ColumnStore Config table try { sysConfig->write(); sleep(1); @@ -1333,7 +1333,7 @@ static void chldHandleThread(MonitorConfig config) (*listPtr).processID != 0 ) || ( (*listPtr).state == oam::ACTIVE && (*listPtr).processID == 0 ) ) { - log.writeLog(__LINE__, "*****Calpont Process Restarting: " + (*listPtr).ProcessName + ", old PID = " + oam.itoa((*listPtr).processID), LOG_TYPE_CRITICAL); + log.writeLog(__LINE__, "*****MariaDB ColumnStore Process Restarting: " + (*listPtr).ProcessName + ", old PID = " + oam.itoa((*listPtr).processID), LOG_TYPE_CRITICAL); if ( (*listPtr).dieCounter >= processRestartCount || processRestartCount == 0) { @@ -1530,7 +1530,7 @@ static void chldHandleThread(MonitorConfig config) } //Log this event - log.writeLog(__LINE__, "Calpont Process " + (*listPtr).ProcessName + restartStatus, LOG_TYPE_INFO); + log.writeLog(__LINE__, "MariaDB ColumnStore Process " + (*listPtr).ProcessName + restartStatus, LOG_TYPE_INFO); } } } @@ -2455,6 +2455,9 @@ void processStatusMSG(messageqcpp::IOSocket* cfIos) memcpy(fShmSystemStatus[0].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set System State = " + oamState[state], LOG_TYPE_DEBUG); } + + BRM::DBRM dbrm; + dbrm.setSystemQueryReady(true); } } break; From 5df447b6ec612cab24f8d62892d2d5aca0c762d0 Mon Sep 17 00:00:00 2001 From: Ravi Prakash Date: Tue, 14 Aug 2018 11:59:09 -0700 Subject: [PATCH 109/123] Fix MCOL-1635 where an "insert into table select query" crashes the server. This happens for a MEDIUMBLOB column type. --- dbcon/mysql/ha_calpont_dml.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dbcon/mysql/ha_calpont_dml.cpp b/dbcon/mysql/ha_calpont_dml.cpp index 9583d9f9e..c2dc36e47 100755 --- a/dbcon/mysql/ha_calpont_dml.cpp +++ b/dbcon/mysql/ha_calpont_dml.cpp @@ -1615,8 +1615,11 @@ int ha_calpont_impl_write_batch_row_(uchar *buf, TABLE* table, cal_impl_if::cal_ } else if (ci.columnTypes[colpos].colWidth < 16777216) { - dataLength = *(uint32_t*) buf; - buf = buf + 3 ; + dataLength = *(uint16_t*) buf; + buf = buf + 2 ; + if (*(uint8_t*)buf) + dataLength += 256*256*(*(uint8_t*)buf) ; + buf++; } else { From 52082ebfd515815ba5fd77ad1ab29a4d6e11f937 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Wed, 15 Aug 2018 08:30:02 +0100 Subject: [PATCH 110/123] MCOL-1653 Fix namespace issue Boost is taken out of global namespace to fix a conflict with MariaDB 10.3. This broke to_string in CentOS. Explicit namespace now used. --- dbcon/mysql/ha_calpont_impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 62712f42a..8c790e8d4 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -4109,7 +4109,7 @@ void ha_calpont_impl_start_bulk_insert(ha_rows rows, TABLE* table) #ifdef _MSC_VER aCmdLine = aCmdLine + "/bin/cpimport.exe -N -P " + to_string(localModuleId) + " -s " + ci->delimiter + " -e 0" + " -E " + escapechar + ci->enclosed_by + " "; #else - aCmdLine = aCmdLine + "/bin/cpimport -m 1 -N -P " + to_string(localModuleId) + " -s " + ci->delimiter + " -e 0" + " -E " + escapechar + ci->enclosed_by + " "; + aCmdLine = aCmdLine + "/bin/cpimport -m 1 -N -P " + boost::to_string(localModuleId) + " -s " + ci->delimiter + " -e 0" + " -E " + escapechar + ci->enclosed_by + " "; #endif } } From 69486310f298ea3a7343aa64afe846d818c59327 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Thu, 16 Aug 2018 13:01:04 +0300 Subject: [PATCH 111/123] MCOL-1637 Return limit value comparision to fix the regression. --- dbcon/joblist/jlf_subquery.cpp | 4 ++-- dbcon/joblist/joblistfactory.cpp | 2 +- dbcon/mysql/ha_calpont_execplan.cpp | 5 +++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/dbcon/joblist/jlf_subquery.cpp b/dbcon/joblist/jlf_subquery.cpp index add3dc533..1e4eaeeec 100644 --- a/dbcon/joblist/jlf_subquery.cpp +++ b/dbcon/joblist/jlf_subquery.cpp @@ -756,8 +756,8 @@ int doFromSubquery(CalpontExecutionPlan* ep, const string& alias, const string& void addOrderByAndLimit(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo) { // make sure there is a LIMIT -// if (csep->orderByCols().size() > 0 csep->limitNum() == (uint64_t) - 1) -// return; + if (csep->orderByCols().size() > 0 && csep->limitNum() == (uint64_t) - 1) + return; jobInfo.limitStart = csep->limitStart(); jobInfo.limitCount = csep->limitNum(); diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index 6fa0adbab..8c0615d83 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -1843,7 +1843,7 @@ void makeVtableModeSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobStepVector& querySteps, JobStepVector& projectSteps, DeliveredTableMap& deliverySteps) { // @bug4848, enhance and unify limit handling. -// if (csep->limitNum() != (uint64_t) - 1) + if (csep->limitNum() != (uint64_t) - 1) { // special case for outer query order by limit -- return all if (jobInfo.subId == 0 && csep->hasOrderBy()) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 59eecb631..f39ef1c96 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -10110,6 +10110,11 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro { csep->limitNum(((Item_int*)gi.groupByTables->select_lex->select_limit)->val_int()); } + else + { + if (csep->hasOrderBy()) + csep->limitNum((uint64_t) - 2); + } if (gi.groupByTables->select_lex->offset_limit) { From a98aec07fad9339b74f641c1a4644c1d4738c7cb Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Fri, 17 Aug 2018 01:10:00 +0300 Subject: [PATCH 112/123] MCOL-1655 removed hardcoded %debug from ddl.y. --- dbcon/ddlpackage/CMakeLists.txt | 4 ++-- dbcon/ddlpackage/ddl.l | 3 +++ dbcon/ddlpackage/ddl.y | 1 - 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/dbcon/ddlpackage/CMakeLists.txt b/dbcon/ddlpackage/CMakeLists.txt index ae2f82fa9..82b7ba756 100644 --- a/dbcon/ddlpackage/CMakeLists.txt +++ b/dbcon/ddlpackage/CMakeLists.txt @@ -1,4 +1,3 @@ - include_directories( ${ENGINE_COMMON_INCLUDES} ) ADD_CUSTOM_COMMAND( @@ -9,9 +8,10 @@ ADD_CUSTOM_COMMAND( DEPENDS ddl.y ddl.l ) + # Parser puts extra info to stderr. INCLUDE(../../check_compiler_flag.cmake) -MY_CHECK_AND_SET_COMPILER_FLAG("-DYYDEBUG" DEBUG) +MY_CHECK_AND_SET_COMPILER_FLAG("-DYYDEBUG=1" DEBUG) ########### next target ############### diff --git a/dbcon/ddlpackage/ddl.l b/dbcon/ddlpackage/ddl.l index 6eeaafb0b..7f9362cee 100644 --- a/dbcon/ddlpackage/ddl.l +++ b/dbcon/ddlpackage/ddl.l @@ -33,6 +33,9 @@ using namespace ddlpackage; typedef enum { NOOP, STRIP_QUOTES } copy_action_t; +#if YYDEBUG == 0 +int ddldebug = 0; +#endif int lineno = 1; void ddlerror(struct pass_to_bison* x, char const *s); diff --git a/dbcon/ddlpackage/ddl.y b/dbcon/ddlpackage/ddl.y index 96867cfb8..2556b8340 100644 --- a/dbcon/ddlpackage/ddl.y +++ b/dbcon/ddlpackage/ddl.y @@ -63,7 +63,6 @@ char* copy_string(const char *str); %pure-parser %lex-param {void * scanner} %parse-param {struct ddlpackage::pass_to_bison * x} -%debug /* Bison uses this to generate a C union definition. This is used to store the application created values associated with syntactic From e5891e4ddc62b6ba4583ca34ec8e02e18509449d Mon Sep 17 00:00:00 2001 From: Ravi Prakash Date: Thu, 16 Aug 2018 17:26:53 -0700 Subject: [PATCH 113/123] Fix MCOL-1577 ColumnStore to allow CREATE TABLE table_name LIKE Syntax The code walks the source table meta-data structure to generate CREATE table statement which creates the new table. --- dbcon/ddlpackage/ddl.y | 7 +- dbcon/mysql/ha_calpont_ddl.cpp | 164 +++++++++++++++++++++++++++++++++ 2 files changed, 170 insertions(+), 1 deletion(-) diff --git a/dbcon/ddlpackage/ddl.y b/dbcon/ddlpackage/ddl.y index 4242fe93b..7c2c58efe 100644 --- a/dbcon/ddlpackage/ddl.y +++ b/dbcon/ddlpackage/ddl.y @@ -716,11 +716,16 @@ default_clause: } | DEFAULT NULL_TOK {$$ = new ColumnDefaultValue(NULL);} | DEFAULT USER {$$ = new ColumnDefaultValue("$USER");} - | DEFAULT CURRENT_USER {$$ = new ColumnDefaultValue("$CURRENT_USER");} + | DEFAULT CURRENT_USER optional_braces {$$ = new ColumnDefaultValue("$CURRENT_USER");} | DEFAULT SESSION_USER {$$ = new ColumnDefaultValue("$SESSION_USER");} | DEFAULT SYSTEM_USER {$$ = new ColumnDefaultValue("$SYSTEM_USER");} ; +optional_braces: + /* empty */ {} + | '(' ')' {} + ; + data_type: character_string_type | binary_string_type diff --git a/dbcon/mysql/ha_calpont_ddl.cpp b/dbcon/mysql/ha_calpont_ddl.cpp index 2d7c52563..0aebfaaee 100644 --- a/dbcon/mysql/ha_calpont_ddl.cpp +++ b/dbcon/mysql/ha_calpont_ddl.cpp @@ -1912,6 +1912,79 @@ pair parseTableName(const string& tn) } +// +// get_field_default_value: Returns the default value as a string value +// NOTE: This is duplicated code copied from show.cc and a MDEV-17006 has +// been created. +// + +static bool get_field_default_value(THD *thd, Field *field, String *def_value, + bool quoted) +{ + bool has_default; + enum enum_field_types field_type= field->type(); + + has_default= (field->default_value || + (!(field->flags & NO_DEFAULT_VALUE_FLAG) && + field->unireg_check != Field::NEXT_NUMBER)); + + def_value->length(0); + if (has_default) + { + StringBuffer str(field->charset()); + if (field->default_value) + { + field->default_value->print(&str); + if (field->default_value->expr->need_parentheses_in_default()) + { + def_value->set_charset(&my_charset_utf8mb4_general_ci); + def_value->append('('); + def_value->append(str); + def_value->append(')'); + } + else + def_value->append(str); + } + else if (!field->is_null()) + { // Not null by default + if (field_type == MYSQL_TYPE_BIT) + { + str.qs_append('b'); + str.qs_append('\''); + str.qs_append(field->val_int(), 2); + str.qs_append('\''); + quoted= 0; + } + else + { + field->val_str(&str); + if (!field->str_needs_quotes()) + quoted= 0; + } + if (str.length()) + { + StringBuffer def_val; + uint dummy_errors; + /* convert to system_charset_info == utf8 */ + def_val.copy(str.ptr(), str.length(), field->charset(), + system_charset_info, &dummy_errors); + if (quoted) + append_unescaped(def_value, def_val.ptr(), def_val.length()); + else + def_value->append(def_val); + } + else if (quoted) + def_value->set(STRING_WITH_LEN("''"), system_charset_info); + } + else if (field->maybe_null() && quoted) + def_value->set(STRING_WITH_LEN("NULL"), system_charset_info); // Null as default + else + return 0; + + } + return has_default; +} + int ha_calpont_impl_create_(const char* name, TABLE* table_arg, HA_CREATE_INFO* create_info, cal_connection_info& ci) { #ifdef INFINIDB_DEBUG @@ -2096,6 +2169,97 @@ int ha_calpont_impl_create_(const char* name, TABLE* table_arg, HA_CREATE_INFO* return 1; } + // + // Check if this is a "CREATE TABLE ... LIKE " statement. + // If so generate a full create table statement using the properties of + // the source table. Note that source table has to be a columnstore table and + // we only check for currently supported options. + // + + if (thd->lex->create_info.like()) + { + TABLE_SHARE *share = table_arg->s; + my_bitmap_map *old_map; // To save the read_set + char datatype_buf[MAX_FIELD_WIDTH], def_value_buf[MAX_FIELD_WIDTH]; + String datatype, def_value; + ostringstream oss; + string tbl_name (name+2); + std::replace(tbl_name.begin(), tbl_name.end(), '/', '.'); + + // Save the current read_set map and mark it for read + old_map= tmp_use_all_columns(table_arg, table_arg->read_set); + + oss << "CREATE TABLE " << tbl_name << " ("; + + restore_record(table_arg, s->default_values); + for (Field **field= table_arg->field; *field; field++) + { + uint flags = (*field)->flags; + datatype.set(datatype_buf, sizeof(datatype_buf), system_charset_info); + (*field)->sql_type(datatype); + if (field != table_arg->field) + oss << ", "; + oss << (*field)->field_name.str << " " << datatype.ptr(); + + if (flags & NOT_NULL_FLAG) + oss << " NOT NULL"; + + def_value.set(def_value_buf, sizeof(def_value_buf), system_charset_info); + if (get_field_default_value(thd, *field, &def_value, true)) { + oss << " DEFAULT " << def_value.c_ptr(); + } + if ((*field)->comment.length) + { + String comment; + append_unescaped(&comment, (*field)->comment.str, (*field)->comment.length); + oss << " COMMENT "; + oss << comment.c_ptr(); + } + + } + // End the list of columns + oss<< ") ENGINE=columnstore "; + + // Process table level options + + if (create_info->auto_increment_value > 1) + { + oss << " AUTO_INCREMENT=" << create_info->auto_increment_value; + } + + if (share->table_charset) + { + oss << " DEFAULT CHARSET=" << share->table_charset->csname; + } + + // Process table level options such as MIN_ROWS, MAX_ROWS, COMMENT + + if (share->min_rows) + { + char buff[80]; + longlong10_to_str(share->min_rows, buff, 10); + oss << " MIN_ROWS=" << buff; + } + + if (share->max_rows) { + char buff[80]; + longlong10_to_str(share->max_rows, buff, 10); + oss << " MAX_ROWS=" << buff; + } + + if (share->comment.length) { + String comment; + append_unescaped(&comment, share->comment.str, share->comment.length); + oss << " COMMENT "; + oss << comment.c_ptr(); + } + + oss << ";"; + stmt = oss.str(); + + tmp_restore_column_map(table_arg->read_set, old_map); + } + rc = ProcessDDLStatement(stmt, db, tbl, tid2sid(thd->thread_id), emsg, compressiontype, isAnyAutoincreCol, startValue, columnName); if (rc != 0) From 580a3ec123da009752a0c80a5875964238b08557 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Fri, 17 Aug 2018 07:55:51 +0100 Subject: [PATCH 114/123] MCOL-1647 Fix TIME regressions Fixes the following: * Read past buffer end in intToDatetime / intToTime * Allow intToTime to convert datetime * Allow intToTime to convert shortened time values * Allow stringToTime to convert datetime and int time values * Fix saturation / bad values in intToTime and stringToTime * Fix TIME return in STR_TO_DATE() * Fix NULL return on type inequality for TIMEDIFF() * Fix zero day calculation error in ADDTIME()/SUBTIME() * Fix DATETIME to int calculation error in aggregate bit operations * Make the new harderning flags optional with -DSECURITY_HARDENED_NEW --- CMakeLists.txt | 11 +-- utils/dataconvert/dataconvert.cpp | 108 +++++++++++++++++++++++------ utils/dataconvert/dataconvert.h | 2 +- utils/funcexp/func_str_to_date.cpp | 18 +++++ utils/funcexp/func_timediff.cpp | 6 -- utils/funcexp/functor.cpp | 2 +- utils/funcexp/functor_dtm.h | 4 ++ utils/rowgroup/rowaggregation.cpp | 10 +-- 8 files changed, 122 insertions(+), 39 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 30eb38f6c..adee980ba 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -122,6 +122,7 @@ ELSE() SET(security_default ON) ENDIF() OPTION(SECURITY_HARDENED "Use security-enhancing compiler features (stack protector, relro, etc)" ${security_default}) +OPTION(SECURITY_HARDENED_NEW "Use new security-enhancing compilier features" OFF) IF(SECURITY_HARDENED) # security-enhancing flags MY_CHECK_AND_SET_COMPILER_FLAG("-pie -fPIC") @@ -129,12 +130,14 @@ IF(SECURITY_HARDENED) MY_CHECK_AND_SET_COMPILER_FLAG("-fstack-protector --param=ssp-buffer-size=4") MY_CHECK_AND_SET_COMPILER_FLAG("-D_FORTIFY_SOURCE=2" RELEASE RELWITHDEBINFO) MY_CHECK_AND_SET_COMPILER_FLAG("-fexceptions") - MY_CHECK_AND_SET_COMPILER_FLAG("-mcet -fcf-protection") - MY_CHECK_AND_SET_COMPILER_FLAG("-fstack-protector-strong") - MY_CHECK_AND_SET_COMPILER_FLAG("-fstack-clash-protection") + IF(SECURITY_HARDENED_NEW) + MY_CHECK_AND_SET_COMPILER_FLAG("-mcet -fcf-protection") + MY_CHECK_AND_SET_COMPILER_FLAG("-fstack-protector-strong") + MY_CHECK_AND_SET_COMPILER_FLAG("-fstack-clash-protection") + ENDIF() ENDIF() -SET (ENGINE_LDFLAGS "-Wl,--no-as-needed -Wl,--add-needed") +SET (ENGINE_LDFLAGS "-Wl,--no-as-needed -Wl,--add-needed") FIND_PACKAGE(Boost 1.53.0 REQUIRED COMPONENTS system filesystem thread regex date_time) diff --git a/utils/dataconvert/dataconvert.cpp b/utils/dataconvert/dataconvert.cpp index c4dbbc728..832143c5b 100644 --- a/utils/dataconvert/dataconvert.cpp +++ b/utils/dataconvert/dataconvert.cpp @@ -2566,7 +2566,6 @@ int64_t DataConvert::intToDatetime(int64_t data, bool* date) hour = string(buf + 8, 2); min = string(buf + 10, 2); sec = string(buf + 12, 2); - msec = string(buf + 14, 6); break; case 12: @@ -2576,7 +2575,6 @@ int64_t DataConvert::intToDatetime(int64_t data, bool* date) hour = string(buf + 6, 2); min = string(buf + 8, 2); sec = string(buf + 10, 2); - msec = string(buf + 12, 6); break; case 10: @@ -2585,7 +2583,6 @@ int64_t DataConvert::intToDatetime(int64_t data, bool* date) hour = string(buf + 4, 2); min = string(buf + 6, 2); sec = string(buf + 8, 2); - msec = string(buf + 10, 6); break; case 9: @@ -2594,7 +2591,6 @@ int64_t DataConvert::intToDatetime(int64_t data, bool* date) hour = string(buf + 3, 2); min = string(buf + 5, 2); sec = string(buf + 7, 2); - msec = string(buf + 9, 6); break; case 8: @@ -2645,7 +2641,7 @@ int64_t DataConvert::intToDatetime(int64_t data, bool* date) h = atoi(hour.c_str()); minute = atoi(min.c_str()); s = atoi(sec.c_str()); - ms = atoi(msec.c_str()); + ms = 0; if (!isDateValid(d, m, y) || !isDateTimeValid(h, minute, s, ms)) return -1; @@ -2664,7 +2660,7 @@ int64_t DataConvert::intToDatetime(int64_t data, bool* date) return *(reinterpret_cast(&adaytime)); } -int64_t DataConvert::intToTime(int64_t data) +int64_t DataConvert::intToTime(int64_t data, bool fromString) { char buf[21] = {0}; char* bufread = buf; @@ -2693,43 +2689,78 @@ int64_t DataConvert::intToTime(int64_t data) bufread++; } + bool zero = false; + switch (strlen(bufread)) { + // A full datetime + case 14: + hour = string(buf + 8, 2); + min = string(buf + 10, 2); + sec = string(buf + 12, 2); + break; + + // Date so this is all 0 + case 8: + zero = true; + break; + case 7: hour = string(bufread, 3); min = string(bufread + 2, 2); sec = string(bufread + 4, 2); - msec = string(bufread + 6, 6); break; case 6: hour = string(bufread, 2); min = string(bufread + 2, 2); sec = string(bufread + 4, 2); - msec = string(bufread + 6, 6); + break; + + case 5: + hour = string(bufread, 1); + min = string(bufread + 1, 2); + sec = string(bufread + 3, 2); break; case 4: min = string(bufread, 2); sec = string(bufread + 2, 2); - msec = string(bufread + 4, 6); + break; + + case 3: + min = string(bufread, 1); + sec = string(bufread + 1, 2); break; case 2: sec = string(bufread, 2); - msec = string(bufread + 2, 6); + break; + + case 1: + sec = string(bufread, 1); break; default: return -1; } - h = atoi(hour.c_str()); - minute = atoi(min.c_str()); - s = atoi(sec.c_str()); - ms = atoi(msec.c_str()); + if (!zero) + { + h = atoi(hour.c_str()); + minute = atoi(min.c_str()); + s = atoi(sec.c_str()); + } + else if (fromString) + { + // Saturate fromString + h = 838; + minute = 59; + s = 59; + ms = 999999; + } - if (!isTimeValid(h, minute, s, ms)) + if (!isTimeValid(h, minute, s, 0)) return -1; atime.hour = h; @@ -2749,6 +2780,7 @@ int64_t DataConvert::stringToTime(const string& data) uint64_t min = 0, sec = 0, msec = 0; int64_t day = -1, hour = 0; bool isNeg = false; + bool hasDate = false; string time, hms, ms; char* end = NULL; @@ -2760,18 +2792,27 @@ int64_t DataConvert::stringToTime(const string& data) isNeg = true; } + if (data.substr(pos+1, data.length()-pos-1).find("-") != string::npos) + { + // A second dash, this has a date + hasDate = true; + isNeg = false; + } // Day pos = data.find(" "); if (pos != string::npos) { - day = strtol(data.substr(0, pos).c_str(), &end, 10); + if (!hasDate) + { + day = strtol(data.substr(0, pos).c_str(), &end, 10); - if (*end != '\0') - return -1; + if (*end != '\0') + return -1; - hour = day * 24; - day = -1; + hour = day * 24; + day = -1; + } time = data.substr(pos + 1, data.length() - pos - 1); } else @@ -2779,6 +2820,22 @@ int64_t DataConvert::stringToTime(const string& data) time = data; } + if (time.find(":") == string::npos) + { + if (hasDate) + { + // Has dashes, no colons. This is just a date! + // Or the length < 6 (MariaDB returns NULL) + return -1; + } + else + { + // This is an int time + return intToTime(atoll(time.c_str()), true); + } + } + + // Fraction pos = time.find("."); @@ -2797,11 +2854,18 @@ int64_t DataConvert::stringToTime(const string& data) if (pos == string::npos) { - hour += atoi(hms.c_str()); + if (hour >= 0) + hour += atoi(hms.c_str()); + else + hour -= atoi(hms.c_str()); } else { - hour += atoi(hms.substr(0, pos).c_str()); + if (hour >= 0) + hour += atoi(hms.substr(0, pos).c_str()); + else + hour -= atoi(hms.substr(0, pos).c_str()); + ms = hms.substr(pos + 1, hms.length() - pos - 1); } diff --git a/utils/dataconvert/dataconvert.h b/utils/dataconvert/dataconvert.h index c01f261b6..a6ce20198 100644 --- a/utils/dataconvert/dataconvert.h +++ b/utils/dataconvert/dataconvert.h @@ -541,7 +541,7 @@ public: // convert integer to datetime EXPORT static int64_t intToDatetime(int64_t data, bool* isDate = NULL); // convert integer to date - EXPORT static int64_t intToTime(int64_t data); + EXPORT static int64_t intToTime(int64_t data, bool fromString = false); // convert string to date. alias to stringToDate EXPORT static int64_t dateToInt(const std::string& date); // convert string to datetime. alias to datetimeToInt diff --git a/utils/funcexp/func_str_to_date.cpp b/utils/funcexp/func_str_to_date.cpp index 42d43cb6b..31bbbf6ca 100644 --- a/utils/funcexp/func_str_to_date.cpp +++ b/utils/funcexp/func_str_to_date.cpp @@ -198,6 +198,24 @@ int64_t Func_str_to_date::getDatetimeIntVal(rowgroup::Row& row, return time; } +int64_t Func_str_to_date::getTimeIntVal(rowgroup::Row& row, + FunctionParm& parm, + bool& isNull, + CalpontSystemCatalog::ColType& ct) +{ + dataconvert::DateTime dateTime; + dataconvert::Time retTime; + dateTime = getDateTime(row, parm, isNull, ct); + retTime.day = 0; + retTime.is_neg = false; + retTime.hour = dateTime.hour; + retTime.minute = dateTime.minute; + retTime.second = dateTime.second; + retTime.msecond = dateTime.msecond; + int64_t time = *(reinterpret_cast(&retTime)); + return time; +} + int64_t Func_str_to_date::getIntVal(rowgroup::Row& row, FunctionParm& parm, bool& isNull, diff --git a/utils/funcexp/func_timediff.cpp b/utils/funcexp/func_timediff.cpp index d17511f76..742e8faf7 100644 --- a/utils/funcexp/func_timediff.cpp +++ b/utils/funcexp/func_timediff.cpp @@ -109,12 +109,6 @@ string Func_timediff::getStrVal(rowgroup::Row& row, int64_t val1 = -1, val2 = -1; bool isDate1 = false, isDate2 = false; - if (type1 != type2) - { - isNull = true; - return ""; - } - switch (type1) { case execplan::CalpontSystemCatalog::DATE: diff --git a/utils/funcexp/functor.cpp b/utils/funcexp/functor.cpp index f722dd557..1e50ea1fc 100644 --- a/utils/funcexp/functor.cpp +++ b/utils/funcexp/functor.cpp @@ -228,7 +228,7 @@ int64_t Func::addTime(DateTime& dt1, Time& dt2) month = dt1.month; int addyear = 0; - if (day < 0) + if (day <= 0) { int monthSave = month; diff --git a/utils/funcexp/functor_dtm.h b/utils/funcexp/functor_dtm.h index d7837a4fe..bcff47854 100644 --- a/utils/funcexp/functor_dtm.h +++ b/utils/funcexp/functor_dtm.h @@ -473,6 +473,10 @@ public: FunctionParm& fp, bool& isNull, execplan::CalpontSystemCatalog::ColType& op_ct); + int64_t getTimeIntVal(rowgroup::Row& row, + FunctionParm& fp, + bool& isNull, + execplan::CalpontSystemCatalog::ColType& op_ct); }; diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 1a28de089..f9db8b266 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -1547,9 +1547,9 @@ void RowAggregation::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut, in case execplan::CalpontSystemCatalog::DATETIME: { uint64_t dtm = rowIn.getUintField(colIn); - valIn = ((dtm >> 48) * 10000000000000000LL) + (((dtm >> 44) & 0xF) * 100000000000000) + - (((dtm >> 38) & 077) * 1000000000000) + (((dtm >> 32) & 077) * 10000000000) + - (((dtm >> 26) & 077) * 100000000) + (((dtm >> 20) & 077) * 1000000) + (dtm & 0xfffff); + valIn = ((dtm >> 48) * 10000000000LL) + (((dtm >> 44) & 0xF) * 100000000) + + (((dtm >> 38) & 077) * 1000000) + (((dtm >> 32) & 077) * 10000) + + (((dtm >> 26) & 077) * 100) + ((dtm >> 20) & 077); break; } @@ -1565,8 +1565,8 @@ void RowAggregation::doBitOp(const Row& rowIn, int64_t colIn, int64_t colOut, in } hour |= ((dtm >> 40) & 0xfff); - valIn = (hour * 10000000000) + - (((dtm >> 32) & 0xff) * 100000000) + (((dtm >> 24) & 0xff) * 1000000) + (dtm & 0xffffff); + valIn = (hour * 10000) + + (((dtm >> 32) & 0xff) * 100) + ((dtm >> 24) & 0xff); break; } From 91fbfb7d0be6746f143dbea77b52c68bd03d9914 Mon Sep 17 00:00:00 2001 From: David Hall Date: Thu, 23 Aug 2018 15:14:17 -0500 Subject: [PATCH 115/123] MCOL-1669 get correct return type when a built-in agg is performed on a const --- dbcon/mysql/ha_calpont_execplan.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index d104f6e32..3fe05227d 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4145,7 +4145,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // MCOL-1201 For UDAnF multiple parameters vector selCols; vector orderCols; - + bool bIsConst = false; if (!(gwi.thd->infinidb_vtable.cal_conn_info)) gwi.thd->infinidb_vtable.cal_conn_info = (void*)(new cal_connection_info()); @@ -4324,6 +4324,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) parm.reset(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError)); ac->constCol(parm); + bIsConst = true; break; } @@ -4440,7 +4441,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // Get result type // Modified for MCOL-1201 multi-argument aggregate - if (ac->aggParms().size() > 0) + if (!bIsConst && ac->aggParms().size() > 0) { // These are all one parm functions, so we can safely // use the first parm for result type. From 62f296b4437df0ea27528a4c5c3e9435513109f9 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Thu, 30 Aug 2018 16:59:41 +0300 Subject: [PATCH 116/123] MCOL-1510 Make changes to allow aggregations as function arguments when build in release mode. See also MDEV-16842. --- dbcon/mysql/ha_calpont.cpp | 13 --- dbcon/mysql/ha_calpont.h | 1 - dbcon/mysql/ha_calpont_execplan.cpp | 118 ++++++++++++++++------------ dbcon/mysql/ha_calpont_impl.cpp | 1 - dbcon/mysql/ha_calpont_impl_if.h | 5 +- 5 files changed, 68 insertions(+), 70 deletions(-) diff --git a/dbcon/mysql/ha_calpont.cpp b/dbcon/mysql/ha_calpont.cpp index 6167054c5..8e906dd42 100644 --- a/dbcon/mysql/ha_calpont.cpp +++ b/dbcon/mysql/ha_calpont.cpp @@ -1187,18 +1187,6 @@ ha_calpont_group_by_handler::ha_calpont_group_by_handler(THD* thd_arg, Query* qu order_by(query->order_by), having(query->having) { - List_iterator_fast item_iter(*select); - Item* item; - char* str = NULL; - while((item = item_iter++)) - { - String descr; - item->print(&descr, QT_ORDINARY); - str = new char[descr.length()+1]; - strncpy(str, descr.ptr(), descr.length()); - str[descr.length()] = '\0'; - select_list_descr.push_back(str); - } } /*********************************************************** @@ -1207,7 +1195,6 @@ ha_calpont_group_by_handler::ha_calpont_group_by_handler(THD* thd_arg, Query* qu ***********************************************************/ ha_calpont_group_by_handler::~ha_calpont_group_by_handler() { - select_list_descr.delete_elements(); } /*********************************************************** diff --git a/dbcon/mysql/ha_calpont.h b/dbcon/mysql/ha_calpont.h index 3c6f7e49e..e618ed4f0 100644 --- a/dbcon/mysql/ha_calpont.h +++ b/dbcon/mysql/ha_calpont.h @@ -286,7 +286,6 @@ public: int end_scan(); List* select; - List select_list_descr; TABLE_LIST* table_list; bool distinct; Item* where; diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 3fe05227d..da0fdbced 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -190,21 +190,57 @@ bool nonConstFunc(Item_func* ifp) return false; } -ReturnedColumn* findCorrespTempField(Item_ref* item, gp_walk_info& gwi, bool clone = true) +/*@brief buildAggFrmTempField- build aggr func from extSELECT list item*/ +/*********************************************************** + * DESCRIPTION: + * Server adds additional aggregation items to extended SELECT list and + * references them in projection and HAVING. This f() finds + * corresponding item in extSelAggColsItems and builds + * ReturnedColumn using the item. + * PARAMETERS: + * item Item* used to build aggregation + * gwi main structure + * RETURNS + * ReturnedColumn* if corresponding Item has been found + * NULL otherwise + ***********************************************************/ +ReturnedColumn* buildAggFrmTempField(Item* item, gp_walk_info& gwi) { ReturnedColumn* result = NULL; - uint32_t i; - for (i = 0; i < gwi.returnedCols.size(); i++) + Item_field* ifip = NULL; + Item_ref* irip; + Item_func_or_sum* isfp; + + switch ( item->type() ) { - if (item->ref[0] && item->ref[0]->name.length && - gwi.returnedCols[i]->alias().c_str() && - !strcasecmp(item->ref[0]->name.str, gwi.returnedCols[i]->alias().c_str())) - { - if (clone) - result = gwi.returnedCols[i]->clone(); - else - result = gwi.returnedCols[i].get(); + case Item::FIELD_ITEM: + ifip = reinterpret_cast(item); break; + default: + irip = reinterpret_cast(item); + if ( irip ) + ifip = reinterpret_cast(irip->ref[0]); + break; + } + + if (ifip && ifip->field) + { + std::vector::iterator iter = gwi.extSelAggColsItems.begin(); + for ( ; iter != gwi.extSelAggColsItems.end(); iter++ ) + { + //Item* temp_isfp = *iter; + isfp = reinterpret_cast(*iter); + + if ( isfp->type() == Item::SUM_FUNC_ITEM && + isfp->result_field == ifip->field ) + { + ReturnedColumn* rc = buildAggregateColumn(isfp, gwi); + + if (rc) + result = rc; + + break; + } } } @@ -3101,7 +3137,10 @@ ArithmeticColumn* buildArithmeticColumn( { // There must be an aggregation column in extended SELECT // list so find the corresponding column. - ReturnedColumn* rc = findCorrespTempField(static_cast(sfitempp[0]), gwi); + // Could have it set if there are aggregation funcs as this function arguments. + gwi.fatalParseError = false; + + ReturnedColumn* rc = buildAggFrmTempField(sfitempp[0], gwi); if(rc) lhs = new ParseTree(rc); } @@ -3117,7 +3156,10 @@ ArithmeticColumn* buildArithmeticColumn( { // There must be an aggregation column in extended SELECT // list so find the corresponding column. - ReturnedColumn* rc = findCorrespTempField(static_cast(sfitempp[1]), gwi); + // Could have it set if there are aggregation funcs as this function arguments. + gwi.fatalParseError = false; + + ReturnedColumn* rc = buildAggFrmTempField(sfitempp[1], gwi); if(rc) rhs = new ParseTree(rc); } @@ -3456,10 +3498,11 @@ ReturnedColumn* buildFunctionColumn( ReturnedColumn* rc = buildReturnedColumn(ifp->arguments()[i], gwi, nonSupport, pushdownHand); // MCOL-1510 It must be a temp table field, so find the corresponding column. - if (pushdownHand + if (!rc && pushdownHand && ifp->arguments()[i]->type() == Item::REF_ITEM) { - rc = findCorrespTempField(static_cast(ifp->arguments()[i]), gwi); + gwi.fatalParseError = false; + rc = buildAggFrmTempField(ifp->arguments()[i], gwi); } if (!rc || nonSupport) @@ -5344,26 +5387,9 @@ void gp_walk(const Item* item, void* arg) } else if (col->type() == Item::FIELD_ITEM && gwip->clauseType == HAVING) { - Item_field* ifip = static_cast(col); - std::vector::iterator iter = gwip->havingAggColsItems.begin(); - Item_func_or_sum* isfp = NULL; - - for ( ; iter != gwip->havingAggColsItems.end(); iter++ ) - { - Item* temp_isfp = *iter; - isfp = reinterpret_cast(temp_isfp); - - if ( isfp->type() == Item::SUM_FUNC_ITEM && - isfp->result_field == ifip->field ) - { - ReturnedColumn* rc = buildAggregateColumn(isfp, *gwip); - - if (rc) - gwip->rcWorkStack.push(rc); - - break; - } - } + ReturnedColumn* rc = buildAggFrmTempField(const_cast(item), *gwip); + if (rc) + gwip->rcWorkStack.push(rc); break; } @@ -5628,7 +5654,7 @@ void parse_item (Item* item, vector& field_vec, // and set hasNonSupportItem if it is so. ReturnedColumn* rc = NULL; if (gwi) - rc = findCorrespTempField(ref, *gwi, false); + rc = buildAggFrmTempField(ref, *gwi); if (!rc) { @@ -8238,12 +8264,13 @@ int cp_get_group_plan(THD* thd, SCSEP& csep, cal_impl_if::cal_group_info& gi) SELECT_LEX select_lex = lex->select_lex; gp_walk_info gwi; gwi.thd = thd; - gwi.groupByAuxDescr = gi.groupByAuxDescr; int status = getGroupPlan(gwi, select_lex, csep, gi); +#ifdef DEBUG_WALK_COND cerr << "---------------- cp_get_group_plan EXECUTION PLAN ----------------" << endl; cerr << *csep << endl ; cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; +#endif if (status > 0) return ER_INTERNAL_ERROR; @@ -8620,8 +8647,6 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro string sel_cols_in_create; string sel_cols_in_select; bool redo = false; - List_iterator_fast itDescr(*gi.groupByAuxDescr); - char* fieldDescr; // empty rcWorkStack and ptWorkStack. They should all be empty by now. clearStacks(gwi); @@ -8640,14 +8665,12 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro while ((item = it++)) { - // Given the size of gi.groupByAuxDescr is equal to gi.groupByFields - fieldDescr = itDescr++; string itemAlias; if(item->name.length) itemAlias = (item->name.str); else { - itemAlias = (fieldDescr ? fieldDescr: ""); + itemAlias = ""; } // @bug 5916. Need to keep checking until getting concret item in case @@ -8754,18 +8777,11 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro return ER_CHECK_NOT_IMPLEMENTED; } - if(!ac->alias().length()) - ac->alias(fieldDescr); // add this agg col to returnedColumnList boost::shared_ptr spac(ac); gwi.returnedCols.push_back(spac); - // This item will be used in HAVING later. - Item_func_or_sum* isfp = reinterpret_cast(item); - - if ( ! isfp->name.length ) - { - gwi.havingAggColsItems.push_back(item); - } + // This item could be used in projection or HAVING later. + gwi.extSelAggColsItems.push_back(item); gwi.selectCols.push_back('`' + escapeBackTick(spac->alias().c_str()) + '`'); String str(256); diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index c2ff80950..59a3df714 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -5265,7 +5265,6 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE // MCOL-1052 Send Items lists down to the optimizer. gi.groupByTables = group_hand->table_list; gi.groupByFields = group_hand->select; - gi.groupByAuxDescr = &group_hand->select_list_descr; gi.groupByWhere = group_hand->where; gi.groupByGroup = group_hand->group_by; gi.groupByOrder = group_hand->order_by; diff --git a/dbcon/mysql/ha_calpont_impl_if.h b/dbcon/mysql/ha_calpont_impl_if.h index 77b30d988..4ebc7adb2 100644 --- a/dbcon/mysql/ha_calpont_impl_if.h +++ b/dbcon/mysql/ha_calpont_impl_if.h @@ -99,7 +99,7 @@ struct gp_walk_info execplan::CalpontSelectExecutionPlan::ReturnedColumnList groupByCols; execplan::CalpontSelectExecutionPlan::ReturnedColumnList subGroupByCols; execplan::CalpontSelectExecutionPlan::ReturnedColumnList orderByCols; - std::vector havingAggColsItems; + std::vector extSelAggColsItems; execplan::CalpontSelectExecutionPlan::ColumnMap columnMap; // This vector temporarily hold the projection columns to be added // to the returnedCols vector for subquery processing. It will be appended @@ -142,7 +142,6 @@ struct gp_walk_info std::map derivedTbFilterMap; uint32_t derivedTbCnt; std::vector subselectList; - List* groupByAuxDescr; // Kludge for Bug 750 int32_t recursionLevel; @@ -200,7 +199,6 @@ struct cal_table_info struct cal_group_info { cal_group_info() : groupByFields(0), - groupByAuxDescr(0), groupByTables(0), groupByWhere(0), groupByGroup(0), @@ -211,7 +209,6 @@ struct cal_group_info ~cal_group_info() { } List* groupByFields; // MCOL-1052 SELECT - List* groupByAuxDescr; //MCOL-1052 Auxilary column descriptions TABLE_LIST* groupByTables; // MCOL-1052 FROM Item* groupByWhere; // MCOL-1052 WHERE ORDER* groupByGroup; // MCOL-1052 GROUP BY From 07561c43d7563f9f197d8973c8486d6425c9bfe6 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Thu, 30 Aug 2018 17:03:14 +0300 Subject: [PATCH 117/123] MCOL-1052 LIMIT processing refactoring in getGroupPlan(). --- dbcon/execplan/calpontselectexecutionplan.h | 12 ++++++++++++ dbcon/joblist/joblistfactory.cpp | 5 +++-- dbcon/mysql/ha_calpont_execplan.cpp | 19 +++++++++++-------- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/dbcon/execplan/calpontselectexecutionplan.h b/dbcon/execplan/calpontselectexecutionplan.h index b3c6458f4..5d1f2fbb6 100644 --- a/dbcon/execplan/calpontselectexecutionplan.h +++ b/dbcon/execplan/calpontselectexecutionplan.h @@ -575,6 +575,15 @@ public: return fHasOrderBy; } + void specHandlerProcessed(const bool hand) + { + fSpecHandlerProcessed = hand; + } + const bool specHandlerProcessed() const + { + return fSpecHandlerProcessed; + } + void selectSubList(const SelectList& selectSubList) { fSelectSubList = selectSubList; @@ -871,6 +880,9 @@ private: uint32_t fPriority; uint32_t fStringTableThreshold; + + // for specific handlers processing, e.g. GROUP BY + bool fSpecHandlerProcessed; // Derived table involved in the query. For derived table optimization std::vector fSubSelectList; diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index 8c0615d83..04989e7b7 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -1846,12 +1846,13 @@ void makeVtableModeSteps(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, if (csep->limitNum() != (uint64_t) - 1) { // special case for outer query order by limit -- return all - if (jobInfo.subId == 0 && csep->hasOrderBy()) + if (jobInfo.subId == 0 && csep->hasOrderBy() && !csep->specHandlerProcessed()) { jobInfo.limitCount = (uint64_t) - 1; } - // support order by and limit in sub-query/union + // support order by and limit in sub-query/union or + // GROUP BY handler processed outer query order else if (csep->orderByCols().size() > 0) { addOrderByAndLimit(csep, jobInfo); diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index da0fdbced..0af74c433 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -10164,25 +10164,28 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro } } - if (ord_cols.length() > 0) // has order by + if ( gwi.orderByCols.size() ) // has order by { gwi.thd->infinidb_vtable.has_order_by = true; csep->hasOrderBy(true); - ord_cols = " order by " + ord_cols; - select_query += ord_cols; + csep->specHandlerProcessed(true); } } // LIMIT and OFFSET are extracted from TABLE_LIST elements. // All of JOIN-ed tables contain relevant limit and offset. - if (gi.groupByTables->select_lex->select_limit) + uint64_t limit = (uint64_t)-1; + if (gi.groupByTables->select_lex->select_limit && + ( limit = static_cast(gi.groupByTables->select_lex->select_limit)->val_int() ) && + limit != (uint64_t)-1 ) { - csep->limitNum(((Item_int*)gi.groupByTables->select_lex->select_limit)->val_int()); + csep->limitNum(limit); } - else + else if (csep->hasOrderBy()) { - if (csep->hasOrderBy()) - csep->limitNum((uint64_t) - 2); + // We use LimitedOrderBy so set the limit to + // go through the check in addOrderByAndLimit + csep->limitNum((uint64_t) - 2); } if (gi.groupByTables->select_lex->offset_limit) From 5b682a522ec200c42323a55190adf3a759878c75 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Mon, 3 Sep 2018 16:28:36 +0300 Subject: [PATCH 118/123] MCOL-1510: Add CalpontSelectExecutionPlan::serialize() changes. --- dbcon/execplan/calpontselectexecutionplan.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbcon/execplan/calpontselectexecutionplan.cpp b/dbcon/execplan/calpontselectexecutionplan.cpp index f21bf618a..0e2417588 100644 --- a/dbcon/execplan/calpontselectexecutionplan.cpp +++ b/dbcon/execplan/calpontselectexecutionplan.cpp @@ -478,6 +478,7 @@ void CalpontSelectExecutionPlan::serialize(messageqcpp::ByteStream& b) const b << (uint64_t)fLimitStart; b << (uint64_t)fLimitNum; b << static_cast(fHasOrderBy); + b << static_cast(fSpecHandlerProcessed); b << static_cast(fSelectSubList.size()); @@ -645,6 +646,7 @@ void CalpontSelectExecutionPlan::unserialize(messageqcpp::ByteStream& b) b >> (uint64_t&)fLimitStart; b >> (uint64_t&)fLimitNum; b >> reinterpret_cast< ByteStream::byte&>(fHasOrderBy); + b >> reinterpret_cast< ByteStream::byte&>(fSpecHandlerProcessed); // for SELECT subquery b >> size; From 5821a710725f6dc1744fe59ae23ff861ef8fa44d Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Wed, 12 Sep 2018 14:08:35 +0300 Subject: [PATCH 119/123] MCOL-1717 GROUP BY handler now processes only relevant queries. --- dbcon/mysql/ha_calpont.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbcon/mysql/ha_calpont.cpp b/dbcon/mysql/ha_calpont.cpp index 8e906dd42..51ede22d5 100644 --- a/dbcon/mysql/ha_calpont.cpp +++ b/dbcon/mysql/ha_calpont.cpp @@ -1156,8 +1156,11 @@ create_calpont_group_by_handler(THD* thd, Query* query) { ha_calpont_group_by_handler* handler = NULL; + // Create a handler if there is an agregate or a GROUP BY + // and if vtable was explicitly disabled. if ( thd->infinidb_vtable.vtable_state == THD::INFINIDB_DISABLE_VTABLE - && thd->variables.infinidb_vtable_mode == 0) + && thd->variables.infinidb_vtable_mode == 0 + && ( query->group_by || thd->lex->select_lex.with_sum_func) ) { handler = new ha_calpont_group_by_handler(thd, query); From 1d0488df33d503f5356d72c997fcbe6f66f74a87 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Mon, 17 Sep 2018 16:15:10 +0300 Subject: [PATCH 120/123] MCOL-1601 GROUP BY supports subqueries in HAVING(derived tables processed by the server.) --- dbcon/mysql/ha_calpont_impl.cpp | 74 +++++++++++++++++++++++++++----- dbcon/mysql/ha_calpont_impl_if.h | 3 ++ dbcon/mysql/sm.cpp | 52 ++++++++++++++++------ dbcon/mysql/sm.h | 14 +++--- 4 files changed, 114 insertions(+), 29 deletions(-) diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 59a3df714..5a71d6d48 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -5045,6 +5045,7 @@ int ha_calpont_impl_external_lock(THD* thd, TABLE* table, int lock_type) { push_warning(thd, Sql_condition::WARN_LEVEL_WARN, 9999, infinidb_autoswitch_warning.c_str()); } + ci->queryState = 0; } else // vtable mode { @@ -5212,10 +5213,13 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE ci->warningMsg = msg; } - // if the previous query has error, re-establish the connection + // If the previous query has error and + // this is not a subquery run by the server(MCOL-1601) + // re-establish the connection if (ci->queryState != 0) { - sm::sm_cleanup(ci->cal_conn_hndl); + if( ci->cal_conn_hndl_st.size() == 0 ) + sm::sm_cleanup(ci->cal_conn_hndl); ci->cal_conn_hndl = 0; } @@ -5237,6 +5241,7 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE hndl = ci->cal_conn_hndl; + ci->cal_conn_hndl_st.push(ci->cal_conn_hndl); if (!csep) csep.reset(new CalpontSelectExecutionPlan()); @@ -5439,11 +5444,15 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE idbassert(hndl != 0); hndl->csc = csc; + // The next section is useless if (thd->infinidb_vtable.vtable_state == THD::INFINIDB_DISABLE_VTABLE) ti.conn_hndl = hndl; else + { ci->cal_conn_hndl = hndl; - + ci->cal_conn_hndl_st.pop(); + ci->cal_conn_hndl_st.push(ci->cal_conn_hndl); + } try { hndl->connect(); @@ -5476,11 +5485,11 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE (thd->infinidb_vtable.vtable_state == THD::INFINIDB_DISABLE_VTABLE) || (thd->infinidb_vtable.vtable_state == THD::INFINIDB_REDO_QUERY)) { - if (ti.tpl_ctx == 0) - { - ti.tpl_ctx = new sm::cpsm_tplh_t(); - ti.tpl_scan_ctx = sm::sp_cpsm_tplsch_t(new sm::cpsm_tplsch_t()); - } + // MCOL-1601 Using stacks of ExeMgr conn hndls, table and scan contexts. + ti.tpl_ctx = new sm::cpsm_tplh_t(); + ti.tpl_ctx_st.push(ti.tpl_ctx); + ti.tpl_scan_ctx = sm::sp_cpsm_tplsch_t(new sm::cpsm_tplsch_t()); + ti.tpl_scan_ctx_st.push(ti.tpl_scan_ctx); // make sure rowgroup is null so the new meta data can be taken. This is for some case mysql // call rnd_init for a table more than once. @@ -5560,6 +5569,7 @@ error: if (ci->cal_conn_hndl) { + // end_query() should be called here. sm::sm_cleanup(ci->cal_conn_hndl); ci->cal_conn_hndl = 0; } @@ -5571,6 +5581,7 @@ internal_error: if (ci->cal_conn_hndl) { + // end_query() should be called here. sm::sm_cleanup(ci->cal_conn_hndl); ci->cal_conn_hndl = 0; } @@ -5802,6 +5813,12 @@ int ha_calpont_impl_group_by_end(ha_calpont_group_by_handler* group_hand, TABLE* ci->cal_conn_hndl = 0; // clear querystats because no query stats available for cancelled query ci->queryStats = ""; + if ( ci->cal_conn_hndl_st.size() ) + { + ci->cal_conn_hndl_st.pop(); + if ( ci->cal_conn_hndl_st.size() ) + ci->cal_conn_hndl = ci->cal_conn_hndl_st.top(); + } } return 0; @@ -5811,6 +5828,7 @@ int ha_calpont_impl_group_by_end(ha_calpont_group_by_handler* group_hand, TABLE* cal_table_info ti = ci->tableMap[table]; sm::cpsm_conhdl_t* hndl; + bool clearScanCtx = false; hndl = ci->cal_conn_hndl; @@ -5818,6 +5836,8 @@ int ha_calpont_impl_group_by_end(ha_calpont_group_by_handler* group_hand, TABLE* { if (ti.tpl_scan_ctx.get()) { + clearScanCtx = ( (ti.tpl_scan_ctx.get()->rowsreturned) && + ti.tpl_scan_ctx.get()->rowsreturned == ti.tpl_scan_ctx.get()->getRowCount() ); try { sm::tpl_scan_close(ti.tpl_scan_ctx); @@ -5829,11 +5849,31 @@ int ha_calpont_impl_group_by_end(ha_calpont_group_by_handler* group_hand, TABLE* } ti.tpl_scan_ctx.reset(); - + if ( ti.tpl_scan_ctx_st.size() ) + { + ti.tpl_scan_ctx_st.pop(); + if ( ti.tpl_scan_ctx_st.size() ) + ti.tpl_scan_ctx = ti.tpl_scan_ctx_st.top(); + } try { if(hndl) - sm::tpl_close(ti.tpl_ctx, &hndl, ci->stats); + { + sm::tpl_close(ti.tpl_ctx, &hndl, ci->stats, clearScanCtx); +// Normaly stats variables are set in external_lock method but we set it here +// since they we pretend we are in vtable_disabled mode and the stats vars won't be set. +// We sum the stats up here since server could run a number of +// queries e.g. each for a subquery in a filter. + if(hndl) + { + if (hndl->queryStats.length()) + ci->queryStats += hndl->queryStats; + if (hndl->extendedStats.length()) + ci->extendedStats += hndl->extendedStats; + if (hndl->miniStats.length()) + ci->miniStats += hndl->miniStats; + } + } ci->cal_conn_hndl = hndl; @@ -5866,6 +5906,20 @@ int ha_calpont_impl_group_by_end(ha_calpont_group_by_handler* group_hand, TABLE* ti.tpl_ctx = 0; + if ( ti.tpl_ctx_st.size() ) + { + ti.tpl_ctx_st.pop(); + if ( ti.tpl_ctx_st.size() ) + ti.tpl_ctx = ti.tpl_ctx_st.top(); + } + + if ( ci->cal_conn_hndl_st.size() ) + { + ci->cal_conn_hndl_st.pop(); + if ( ci->cal_conn_hndl_st.size() ) + ci->cal_conn_hndl = ci->cal_conn_hndl_st.top(); + } + ci->tableMap[table] = ti; // push warnings from CREATE phase diff --git a/dbcon/mysql/ha_calpont_impl_if.h b/dbcon/mysql/ha_calpont_impl_if.h index 4ebc7adb2..72579111b 100644 --- a/dbcon/mysql/ha_calpont_impl_if.h +++ b/dbcon/mysql/ha_calpont_impl_if.h @@ -187,7 +187,9 @@ struct cal_table_info { } ~cal_table_info() {} sm::cpsm_tplh_t* tpl_ctx; + std::stack tpl_ctx_st; sm::sp_cpsm_tplsch_t tpl_scan_ctx; + std::stack tpl_scan_ctx_st; unsigned c; // for debug purpose TABLE* msTablePtr; // no ownership sm::cpsm_conhdl_t* conn_hndl; @@ -273,6 +275,7 @@ struct cal_connection_info } sm::cpsm_conhdl_t* cal_conn_hndl; + std::stack cal_conn_hndl_st; int queryState; CalTableMap tableMap; sm::tableid_t currentTable; diff --git a/dbcon/mysql/sm.cpp b/dbcon/mysql/sm.cpp index 565a65ad0..1d8be5f05 100644 --- a/dbcon/mysql/sm.cpp +++ b/dbcon/mysql/sm.cpp @@ -280,7 +280,7 @@ tpl_open ( tableid_t tableid, cpsm_tplh_t* ntplh, cpsm_conhdl_t* conn_hdl) { - SMDEBUGLOG << "tpl_open: " << conn_hdl << " tableid: " << tableid << endl; + SMDEBUGLOG << "tpl_open: ntplh: " << ntplh << " conn_hdl: " << conn_hdl << " tableid: " << tableid << endl; // if first time enter this function for a statement, set // queryState to QUERY_IN_PRCOESS and get execution plan. @@ -319,7 +319,9 @@ tpl_scan_open ( tableid_t tableid, sp_cpsm_tplsch_t& ntplsch, cpsm_conhdl_t* conn_hdl ) { +#if IDB_SM_DEBUG SMDEBUGLOG << "tpl_scan_open: " << conn_hdl << " tableid: " << tableid << endl; +#endif // @bug 649. No initialization here. take passed in reference ntplsch->tableid = tableid; @@ -354,8 +356,8 @@ tpl_scan_close ( sp_cpsm_tplsch_t& ntplsch ) SMDEBUGLOG << "tpl_scan_close: "; if (ntplsch) - SMDEBUGLOG << " tableid: " << ntplsch->tableid << endl; - + SMDEBUGLOG << "tpl_scan_close: ntplsch " << ntplsch; + SMDEBUGLOG << "tpl_scan_close: tableid: " << ntplsch->tableid << endl; #endif ntplsch.reset(); @@ -365,11 +367,12 @@ tpl_scan_close ( sp_cpsm_tplsch_t& ntplsch ) status_t tpl_close ( cpsm_tplh_t* ntplh, cpsm_conhdl_t** conn_hdl, - QueryStats& stats ) + QueryStats& stats, + bool clear_scan_ctx) { cpsm_conhdl_t* hndl = *conn_hdl; #if IDB_SM_DEBUG - SMDEBUGLOG << "tpl_close: " << hndl; + SMDEBUGLOG << "tpl_close: hndl" << hndl << " ntplh " << ntplh; if (ntplh) SMDEBUGLOG << " tableid: " << ntplh->tableid; @@ -384,9 +387,21 @@ tpl_close ( cpsm_tplh_t* ntplh, // Get the query stats ByteStream bs; ByteStream::quadbyte qb = 3; + //string tmpQueryStats; + //string tmpExtendedStats; + //string tmpMiniStats; bs << qb; hndl->write(bs); + + // MCOL-1601 Dispose of unused empty RowGroup + if (clear_scan_ctx) + { + bs = hndl->exeMgr->read(); + } +#if IDB_SM_DEBUG + SMDEBUGLOG << "tpl_close hndl->exeMgr: " << hndl->exeMgr << endl; +#endif //keep reading until we get a string //TODO: really need to fix this! Why is ExeMgr sending other stuff? for (int tries = 0; tries < 10; tries++) @@ -397,9 +412,20 @@ tpl_close ( cpsm_tplh_t* ntplh, try { - bs >> hndl->queryStats; - bs >> hndl->extendedStats; - bs >> hndl->miniStats; + // MCOL-1601 Server could run a number of subqueries separetely. + // If so there will be a number of statistics returned. + /*if(hndl->queryStats.size()) + { + bs >> tmpQueryStats, hndl->queryStats += tmpQueryStats; + bs >> tmpExtendedStats, hndl->extendedStats += tmpExtendedStats; + bs >> hndl->miniStats, hndl->miniStats += tmpMiniStats; + } + else*/ + { + bs >> hndl->queryStats; + bs >> hndl->extendedStats; + bs >> hndl->miniStats; + } stats.unserialize(bs); stats.setEndTime(); stats.insert(); @@ -415,6 +441,9 @@ tpl_close ( cpsm_tplh_t* ntplh, { // querystats messed up. close connection. // no need to throw for querystats protocol error, like for tablemode. +#if IDB_SM_DEBUG + SMDEBUGLOG << "tpl_close() exception whilst getting stats" << endl; +#endif end_query(hndl); sm_cleanup(hndl); *conn_hdl = 0; @@ -436,9 +465,9 @@ sm_init ( uint32_t sid, { // clear file content #if IDB_SM_DEBUG - smlog.close(); - smlog.open("/tmp/sm.log"); - SMDEBUGLOG << "sm_init: " << dboptions << endl; + //smlog.close(); + //smlog.open("/tmp/sm.log"); + SMDEBUGLOG << "sm_init: " << endl; #endif // @bug5660 Connection changes related to the local pm setting @@ -474,7 +503,6 @@ sm_cleanup ( cpsm_conhdl_t* conn_hdl ) { #if IDB_SM_DEBUG SMDEBUGLOG << "sm_cleanup: " << conn_hdl << endl; - SMDEBUGLOG.close(); #endif delete conn_hdl; diff --git a/dbcon/mysql/sm.h b/dbcon/mysql/sm.h index a2c8defaa..65cf35123 100644 --- a/dbcon/mysql/sm.h +++ b/dbcon/mysql/sm.h @@ -60,12 +60,12 @@ const int SQL_NOT_FOUND = -1000; const int SQL_KILLED = -1001; const int CALPONT_INTERNAL_ERROR = -1007; -#if IDB_SM_DEBUG -extern std::ofstream smlog; -#define SMDEBUGLOG smlog -#else -#define SMDEBUGLOG if (false) std::cerr -#endif +//#if IDB_SM_DEBUG +//extern std::ofstream smlog; +//#define SMDEBUGLOG smlog +//#else +#define SMDEBUGLOG if (true) std::cerr +//#endif extern const std::string DEFAULT_SAVE_PATH; typedef uint64_t tableid_t; @@ -282,7 +282,7 @@ extern status_t tpl_open(tableid_t, cpsm_tplh_t*, cpsm_conhdl_t*); extern status_t tpl_scan_open(tableid_t, sp_cpsm_tplsch_t&, cpsm_conhdl_t*); extern status_t tpl_scan_fetch(sp_cpsm_tplsch_t&, cpsm_conhdl_t*, int* k = 0); extern status_t tpl_scan_close(sp_cpsm_tplsch_t&); -extern status_t tpl_close(cpsm_tplh_t*, cpsm_conhdl_t**, querystats::QueryStats& stats); +extern status_t tpl_close(cpsm_tplh_t*, cpsm_conhdl_t**, querystats::QueryStats& stats, bool clear_scan_ctx = false); } From aa11707585624d70a66659b2d7a93cd34a5792ff Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Mon, 17 Sep 2018 16:17:11 +0300 Subject: [PATCH 121/123] MCOL-1052 ExeMgr now logs original queries processed by GROUP BY handler. --- dbcon/mysql/ha_calpont_impl.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 5a71d6d48..12eadec42 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -5313,8 +5313,12 @@ int ha_calpont_impl_group_by_init(ha_calpont_group_by_handler* group_hand, TABLE return 0; string query; - query.assign(thd->infinidb_vtable.original_query.ptr(), - thd->infinidb_vtable.original_query.length()); + // Set the query text only once if the server executes + // subqueries separately. + if(ci->queryState) + query.assign(""); + else + query.assign(thd->query_string.str(), thd->query_string.length()); csep->data(query); try From 18143ecaeee8340a70f69a3a22e44ea3708bfe57 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Mon, 17 Sep 2018 16:18:54 +0300 Subject: [PATCH 122/123] MCOL-1052 Remove unused symbols. --- dbcon/mysql/ha_calpont_execplan.cpp | 1 - dbcon/mysql/ha_calpont_impl.cpp | 98 ----------------------------- 2 files changed, 99 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 0af74c433..789d85ad3 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -9704,7 +9704,6 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro { gwi.fatalParseError = false; execplan::CalpontSelectExecutionPlan::ReturnedColumnList::iterator iter = gwi.returnedCols.begin(); - AggregateColumn* ac = NULL; for ( ; iter != gwi.returnedCols.end(); iter++ ) { diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 12eadec42..ad83f48db 100644 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -276,104 +276,6 @@ void storeNumericField(Field** f, int64_t value, CalpontSystemCatalog::ColType& } } -void storeNumericFieldGroupBy(Field** f, int64_t value, CalpontSystemCatalog::ColType& ct) -{ - // unset null bit first - if ((*f)->null_ptr) - *(*f)->null_ptr &= ~(*f)->null_bit; - - // For unsigned, use the ColType returned in the row rather than the - // unsigned_flag set by mysql. This is because mysql gets it wrong for SUM() - // Hopefully, in all other cases we get it right. - switch ((*f)->type()) - { - case MYSQL_TYPE_NEWDECIMAL: - { - Field_new_decimal* f2 = (Field_new_decimal*)*f; - - // @bug4388 stick to InfiniDB's scale in case mysql gives wrong scale due - // to create vtable limitation. - if (f2->dec < ct.scale) - f2->dec = ct.scale; - - char buf[256]; - dataconvert::DataConvert::decimalToString(value, (unsigned)ct.scale, buf, 256, ct.colDataType); - f2->store(buf, strlen(buf), f2->charset()); - break; - } - - case MYSQL_TYPE_TINY: //TINYINT type - { - Field_tiny* f2 = (Field_tiny*)*f; - longlong int_val = (longlong)value; - f2->store(int_val, f2->unsigned_flag); - break; - } - - case MYSQL_TYPE_SHORT: //SMALLINT type - { - Field_short* f2 = (Field_short*)*f; - longlong int_val = (longlong)value; - f2->store(int_val, f2->unsigned_flag); - break; - } - - case MYSQL_TYPE_LONG: //INT type - { - Field_long* f2 = (Field_long*)*f; - longlong int_val = (longlong)value; - f2->store(int_val, f2->unsigned_flag); - break; - } - - case MYSQL_TYPE_LONGLONG: //BIGINT type - { - Field_longlong* f2 = (Field_longlong*)*f; - longlong int_val = (longlong)value; - f2->store(int_val, f2->unsigned_flag); - break; - } - - case MYSQL_TYPE_FLOAT: // FLOAT type - { - Field_float* f2 = (Field_float*)*f; - float float_val = *(float*)(&value); - f2->store(float_val); - break; - } - - case MYSQL_TYPE_DOUBLE: // DOUBLE type - { - Field_double* f2 = (Field_double*)*f; - double double_val = *(double*)(&value); - f2->store(double_val); - break; - } - - case MYSQL_TYPE_VARCHAR: - { - Field_varstring* f2 = (Field_varstring*)*f; - char tmp[25]; - - if (ct.colDataType == CalpontSystemCatalog::DECIMAL) - dataconvert::DataConvert::decimalToString(value, (unsigned)ct.scale, tmp, 25, ct.colDataType); - else - snprintf(tmp, 25, "%ld", value); - - f2->store(tmp, strlen(tmp), f2->charset()); - break; - } - - default: - { - Field_longlong* f2 = (Field_longlong*)*f; - longlong int_val = (longlong)value; - f2->store(int_val, f2->unsigned_flag); - break; - } - } -} - // // @bug 2244. Log exception related to lost connection to ExeMgr. // Log exception error from calls to sm::tpl_scan_fetch in fetchNextRow() From 5cab6c4c70bfdb820d173dbd566d57232648e8fd Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Tue, 18 Sep 2018 11:23:38 +0300 Subject: [PATCH 123/123] MCOL-1601 Removed unused symbols. --- dbcon/mysql/sm.cpp | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/dbcon/mysql/sm.cpp b/dbcon/mysql/sm.cpp index 1d8be5f05..9cbfc73e6 100644 --- a/dbcon/mysql/sm.cpp +++ b/dbcon/mysql/sm.cpp @@ -387,9 +387,6 @@ tpl_close ( cpsm_tplh_t* ntplh, // Get the query stats ByteStream bs; ByteStream::quadbyte qb = 3; - //string tmpQueryStats; - //string tmpExtendedStats; - //string tmpMiniStats; bs << qb; hndl->write(bs); @@ -412,20 +409,9 @@ tpl_close ( cpsm_tplh_t* ntplh, try { - // MCOL-1601 Server could run a number of subqueries separetely. - // If so there will be a number of statistics returned. - /*if(hndl->queryStats.size()) - { - bs >> tmpQueryStats, hndl->queryStats += tmpQueryStats; - bs >> tmpExtendedStats, hndl->extendedStats += tmpExtendedStats; - bs >> hndl->miniStats, hndl->miniStats += tmpMiniStats; - } - else*/ - { - bs >> hndl->queryStats; - bs >> hndl->extendedStats; - bs >> hndl->miniStats; - } + bs >> hndl->queryStats; + bs >> hndl->extendedStats; + bs >> hndl->miniStats; stats.unserialize(bs); stats.setEndTime(); stats.insert();