diff --git a/CMakeLists.txt b/CMakeLists.txt index adee980ba..c64f06969 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -158,7 +158,7 @@ SET (ENGINE_TOOLSDIR "${INSTALL_ENGINE}/tools") SET (ENGINE_COMMON_LIBS messageqcpp loggingcpp configcpp idbboot ${Boost_LIBRARIES} xml2 pthread rt libmysql_client) SET (ENGINE_OAM_LIBS oamcpp alarmmanager) SET (ENGINE_BRM_LIBS brm idbdatafile cacheutils rwlock ${ENGINE_OAM_LIBS} ${ENGINE_COMMON_LIBS}) -SET (ENGINE_EXEC_LIBS joblist execplan windowfunction joiner rowgroup funcexp udfsdk dataconvert common compress querystats querytele thrift threadpool ${ENGINE_BRM_LIBS}) +SET (ENGINE_EXEC_LIBS joblist execplan windowfunction joiner rowgroup funcexp udfsdk regr dataconvert common compress querystats querytele thrift threadpool ${ENGINE_BRM_LIBS}) SET (ENGINE_WRITE_LIBS ddlpackageproc ddlpackage dmlpackageproc dmlpackage writeengine writeengineclient idbdatafile cacheutils ${ENGINE_EXEC_LIBS}) SET (ENGINE_COMMON_LDFLAGS "") diff --git a/dbcon/ddlpackage/ddl.l b/dbcon/ddlpackage/ddl.l index 179e5c14b..bb65715da 100644 --- a/dbcon/ddlpackage/ddl.l +++ b/dbcon/ddlpackage/ddl.l @@ -75,9 +75,10 @@ ident_cont [A-Za-z\200-\377_0-9\$] identifier {ident_start}{ident_cont}* extended_identifier {ident_start}{extended_ident_cont}* /* fully qualified names regexes */ -fq_identifier {identifier}\.{identifier} +ident_w_spaces {identifier}\x20* identifier_quoted {grave_accent}{extended_identifier}{grave_accent} identifier_double_quoted {double_quote}{extended_identifier}{double_quote} +column_ident_quoted {grave_accent}{ident_w_spaces}+{grave_accent} integer [-+]?{digit}+ decimal ([-+]?({digit}*\.{digit}+)|({digit}+\.{digit}*)) @@ -184,9 +185,13 @@ TINYTEXT {return TINYTEXT;} TEXT {return TEXT;} MEDIUMTEXT {return MEDIUMTEXT;} LONGTEXT {return LONGTEXT;} +BOOL {return BOOL;} +BOOLEAN {return BOOLEAN;} \n { lineno++;} +{column_ident_quoted} { ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner, STRIP_QUOTES); return IDENT;} + {whitespace} { /* ignore */ } diff --git a/dbcon/ddlpackage/ddl.y b/dbcon/ddlpackage/ddl.y index 37ab49425..2b35c8392 100644 --- a/dbcon/ddlpackage/ddl.y +++ b/dbcon/ddlpackage/ddl.y @@ -112,6 +112,7 @@ MIN_ROWS MODIFY NO NOT NULL_TOK NUMBER NUMERIC ON PARTIAL PRECISION PRIMARY REFERENCES RENAME RESTRICT SET SMALLINT TABLE TEXT TINYBLOB TINYTEXT TINYINT TO UNIQUE UNSIGNED UPDATE USER SESSION_USER SYSTEM_USER VARCHAR VARBINARY VARYING WITH ZONE DOUBLE IDB_FLOAT REAL CHARSET IDB_IF EXISTS CHANGE TRUNCATE +BOOL BOOLEAN %token DQ_IDENT IDENT FCONST SCONST CP_SEARCH_CONDITION_TEXT ICONST DATE TIME @@ -1041,6 +1042,18 @@ exact_numeric_type: $$ = new ColumnType(DDL_UNSIGNED_BIGINT); $$->fLength = DDLDatatypeLength[DDL_BIGINT]; } + | BOOLEAN + { + $$ = new ColumnType(DDL_TINYINT); + $$->fLength = DDLDatatypeLength[DDL_TINYINT]; + $$->fPrecision = 1; + } + | BOOL + { + $$ = new ColumnType(DDL_TINYINT); + $$->fLength = DDLDatatypeLength[DDL_TINYINT]; + $$->fPrecision = 1; + } ; /* Bug 1570, change default scale to 0 from -1 */ opt_precision_scale: diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index da91919f0..da4f73823 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -76,21 +76,55 @@ namespace struct cmpTuple { - bool operator()(boost::tuple a, - boost::tuple b) + bool operator()(boost::tuple* > a, + boost::tuple* > b) { - if (boost::get<0>(a) < boost::get<0>(b)) + uint32_t keya = boost::get<0>(a); + uint32_t keyb = boost::get<0>(b); + int opa; + int opb; + mcsv1sdk::mcsv1_UDAF* pUDAFa; + mcsv1sdk::mcsv1_UDAF* pUDAFb; + + // If key is less than + if (keya < keyb) return true; - - if (boost::get<0>(a) == boost::get<0>(b)) + if (keya == keyb) { - if (boost::get<1>(a) < boost::get<1>(b)) + // test Op + opa = boost::get<1>(a); + opb = boost::get<1>(b); + if (opa < opb) return true; + if (opa == opb) + { + // look at the UDAF object + pUDAFa = boost::get<2>(a); + pUDAFb = boost::get<2>(b); + if (pUDAFa < pUDAFb) + return true; + if (pUDAFa == pUDAFb) + { + if (pUDAFa == NULL) + return false; + std::vector* paramKeysa = boost::get<3>(a); + std::vector* paramKeysb = boost::get<3>(b); - if (boost::get<1>(a) == boost::get<1>(b)) - return boost::get<2>(a) < boost::get<2>(b); + if (paramKeysa->size() < paramKeysb->size()) + return true; + if (paramKeysa->size() == paramKeysb->size()) + { + if (paramKeysa == NULL) + return false; + for (uint64_t i = 0; i < paramKeysa->size(); ++i) + { + if ((*paramKeysa)[i] < (*paramKeysb)[i]) + return true; + } + } + } + } } - return false; } }; @@ -101,7 +135,7 @@ typedef vector RowBucketVec; // The AGG_MAP type is used to maintain a list of aggregate functions in order to // detect duplicates. Since all UDAF have the same op type (ROWAGG_UDAF), we add in // the function pointer in order to ensure uniqueness. -typedef map, uint64_t, cmpTuple> AGG_MAP; +typedef map* >, uint64_t, cmpTuple> AGG_MAP; inline RowAggFunctionType functionIdMap(int planFuncId) { @@ -796,7 +830,6 @@ const string TupleAggregateStep::toString() const return oss.str(); } - SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo) { SJSTEP spjs; @@ -849,10 +882,9 @@ SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo) idbassert(cc != NULL); // @bug5261 bool isNull = (ConstantColumn::NULLDATA == cc->type()); - if (ac->aggOp() == ROWAGG_UDAF) + if (ac->aggOp() == AggregateColumn::UDAF) { UDAFColumn* udafc = dynamic_cast(ac); - if (udafc) { constAggDataVec.push_back( @@ -1099,7 +1131,6 @@ void TupleAggregateStep::prep1PhaseAggregate( uint32_t bigUintWidth = sizeof(uint64_t); // For UDAF uint32_t projColsUDAFIdx = 0; - uint32_t udafcParamIdx = 0; UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // for count column of average function @@ -1296,21 +1327,28 @@ void TupleAggregateStep::prep1PhaseAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; - for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); projColsUDAFIdx++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); + // Save the multi-parm keys for dup-detection. + if (pUDAFFunc && udafc->getContext().getParamKeys()->size() == 0) + { + for (uint64_t k = i+1; + k < returnedColVec.size() && returnedColVec[k].second == AggregateColumn::MULTI_PARM; + ++k) + { + udafc->getContext().getParamKeys()->push_back(returnedColVec[k].first); + } + } // Create a RowAggFunctionCol (UDAF subtype) with the context. funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, outIdx)); break; } } - if (it == jobInfo.projectionCols.end()) { throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -1489,44 +1527,11 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(udafFuncCol->fUDAFContext.getPrecision()); typeAgg.push_back(udafFuncCol->fUDAFContext.getResultType()); widthAgg.push_back(udafFuncCol->fUDAFContext.getColWidth()); - // If the first param is const - udafcParamIdx = 0; - ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - - if (cc) - { - funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; - } - - ++udafcParamIdx; break; } case ROWAGG_MULTI_PARM: { - oidsAgg.push_back(oidsProj[colProj]); - keysAgg.push_back(key); - scaleAgg.push_back(scaleProj[colProj]); - precisionAgg.push_back(precisionProj[colProj]); - typeAgg.push_back(typeProj[colProj]); - widthAgg.push_back(width[colProj]); - - // If the param is const - if (udafc) - { - ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - - if (cc) - { - funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; - } - } - else - { - throw QueryDataExcept("prep1PhaseAggregate: UDAF multi function with no parms", aggregateFuncErr); - } - - ++udafcParamIdx; } break; @@ -1540,7 +1545,7 @@ void TupleAggregateStep::prep1PhaseAggregate( } // find if this func is a duplicate - AGG_MAP::iterator iter = aggFuncMap.find(boost::make_tuple(key, aggOp, pUDAFFunc)); + AGG_MAP::iterator iter = aggFuncMap.find(boost::make_tuple(key, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (iter != aggFuncMap.end()) { @@ -1557,7 +1562,7 @@ void TupleAggregateStep::prep1PhaseAggregate( } else { - aggFuncMap.insert(make_pair(boost::make_tuple(key, aggOp, pUDAFFunc), funct->fOutputColumnIndex)); + aggFuncMap.insert(make_pair(boost::make_tuple(key, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), funct->fOutputColumnIndex)); } if (aggOp != ROWAGG_MULTI_PARM) @@ -1778,7 +1783,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(widthProj[colProj]); - aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[colAgg], 0, pUDAFFunc), colAgg)); + aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[colAgg], 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAgg)); colAgg++; } @@ -1819,7 +1824,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(widthProj[colProj]); - aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[colAgg], 0, pUDAFFunc), colAgg)); + aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[colAgg], 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAgg)); colAgg++; } @@ -1849,7 +1854,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( aggOp, stats, colAgg, colAgg, -1)); functionVec1.push_back(funct); - aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc), colAgg)); + aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAgg)); colAgg++; continue; @@ -1896,12 +1901,21 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); + // Save the multi-parm keys for dup-detection. + if (pUDAFFunc && udafc->getContext().getParamKeys()->size() == 0) + { + for (uint64_t k = i+1; + k < aggColVec.size() && aggColVec[k].second == AggregateColumn::MULTI_PARM; + ++k) + { + udafc->getContext().getParamKeys()->push_back(aggColVec[k].first); + } + } // Create a RowAggFunctionCol (UDAF subtype) with the context. funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAgg)); break; } } - if (it == jobInfo.projectionCols.end()) { throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -1913,11 +1927,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } // skip if this is a duplicate - if (aggFuncMap.find(boost::make_tuple(aggKey, aggOp, pUDAFFunc)) != aggFuncMap.end()) + if (aggFuncMap.find(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)) != aggFuncMap.end()) continue; functionVec1.push_back(funct); - aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc), colAgg)); + aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAgg)); switch (aggOp) { @@ -2121,12 +2135,10 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // If the first param is const udafcParamIdx = 0; ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; } - ++udafcParamIdx; break; } @@ -2141,12 +2153,14 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( widthAgg.push_back(widthProj[colProj]); multiParmIndexes.push_back(colAgg); ++colAgg; - // If the param is const if (udafc) { + if (udafcParamIdx > udafc->aggParms().size() - 1) + { + throw QueryDataExcept("prep1PhaseDistinctAggregate: UDAF multi function with too many parms", aggregateFuncErr); + } ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; @@ -2156,7 +2170,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( { throw QueryDataExcept("prep1PhaseDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr); } - ++udafcParamIdx; } break; @@ -2206,12 +2219,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( { SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(i, -1)); groupByNoDist.push_back(groupby); - aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i)); + aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), i)); } - + // locate the return column position in aggregated rowgroup uint64_t outIdx = 0; - for (uint64_t i = 0; i < returnedColVec.size(); i++) { udafc = NULL; @@ -2231,7 +2243,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { - AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, 0, pUDAFFunc)); + AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (it != aggFuncMap.end()) { @@ -2256,19 +2268,26 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; - for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); projColsUDAFIdx++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); + // Save the multi-parm keys for dup-detection. + if (pUDAFFunc && udafc->getContext().getParamKeys()->size() == 0) + { + for (uint64_t k = i+1; + k < returnedColVec.size() && returnedColVec[k].second == AggregateColumn::MULTI_PARM; + ++k) + { + udafc->getContext().getParamKeys()->push_back(returnedColVec[k].first); + } + } break; } } - if (it == jobInfo.projectionCols.end()) { throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -2366,7 +2385,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( case ROWAGG_BIT_XOR: default: { - AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); + AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (it != aggFuncMap.end()) { @@ -2397,7 +2416,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // check if a SUM or COUNT covered by AVG if (aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) { - it = aggFuncMap.find(boost::make_tuple(returnedColVec[i].first, ROWAGG_AVG, pUDAFFunc)); + it = aggFuncMap.find(boost::make_tuple(returnedColVec[i].first, ROWAGG_AVG, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (it != aggFuncMap.end()) { @@ -2565,7 +2584,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( { // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, outIdx)); @@ -2583,7 +2601,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( functionVec2.push_back(funct); // find if this func is a duplicate - AGG_MAP::iterator iter = aggDupFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); + AGG_MAP::iterator iter = aggDupFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (iter != aggDupFuncMap.end()) { @@ -2600,7 +2618,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } else { - aggDupFuncMap.insert(make_pair(boost::make_tuple(retKey, aggOp, pUDAFFunc), + aggDupFuncMap.insert(make_pair(boost::make_tuple(retKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), funct->fOutputColumnIndex)); } @@ -2609,7 +2627,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } - ++outIdx; } // for (i @@ -2860,7 +2877,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( ++multiParms; continue; } - if (returnedColVec[k].first != distinctColKey) continue; @@ -2881,7 +2897,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex - multiParms)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -2909,7 +2925,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( ++multiParms; continue; } - // search non-distinct functions in functionVec vector::iterator it = functionVec2.begin(); @@ -2925,7 +2940,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex - multiParms)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } else if ((f->fOutputColumnIndex == k) && @@ -2947,7 +2962,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex - multiParms)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -3100,7 +3115,7 @@ void TupleAggregateStep::prep2PhasesAggregate( typeAggPm.push_back(typeProj[colProj]); widthAggPm.push_back(width[colProj]); - aggFuncMap.insert(make_pair(boost::make_tuple(keysAggPm[colAggPm], 0, pUDAFFunc), colAggPm)); + aggFuncMap.insert(make_pair(boost::make_tuple(keysAggPm[colAggPm], 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAggPm)); colAggPm++; } @@ -3141,7 +3156,7 @@ void TupleAggregateStep::prep2PhasesAggregate( typeAggPm.push_back(typeProj[colProj]); widthAggPm.push_back(width[colProj]); - aggFuncMap.insert(make_pair(boost::make_tuple(keysAggPm[colAggPm], 0, pUDAFFunc), colAggPm)); + aggFuncMap.insert(make_pair(boost::make_tuple(keysAggPm[colAggPm], 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAggPm)); colAggPm++; } @@ -3183,21 +3198,28 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; - for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); projColsUDAFIdx++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); + // Save the multi-parm keys for dup-detection. + if (pUDAFFunc && udafc->getContext().getParamKeys()->size() == 0) + { + for (uint64_t k = i+1; + k < aggColVec.size() && aggColVec[k].second == AggregateColumn::MULTI_PARM; + ++k) + { + udafc->getContext().getParamKeys()->push_back(aggColVec[k].first); + } + } // Create a RowAggFunctionCol (UDAF subtype) with the context. funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAggPm)); break; } } - if (it == jobInfo.projectionCols.end()) { throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -3209,11 +3231,11 @@ void TupleAggregateStep::prep2PhasesAggregate( } // skip if this is a duplicate - if (aggFuncMap.find(boost::make_tuple(aggKey, aggOp, pUDAFFunc)) != aggFuncMap.end()) + if (aggFuncMap.find(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)) != aggFuncMap.end()) continue; functionVecPm.push_back(funct); - aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc), colAggPm)); + aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAggPm)); switch (aggOp) { @@ -3420,12 +3442,10 @@ void TupleAggregateStep::prep2PhasesAggregate( // If the first param is const udafcParamIdx = 0; ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; } - ++udafcParamIdx; break; } @@ -3439,12 +3459,14 @@ void TupleAggregateStep::prep2PhasesAggregate( typeAggPm.push_back(typeProj[colProj]); widthAggPm.push_back(width[colProj]); colAggPm++; - // If the param is const if (udafc) { + if (udafcParamIdx > udafc->aggParms().size() - 1) + { + throw QueryDataExcept("prep2PhasesAggregate: UDAF multi function with too many parms", aggregateFuncErr); + } ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; @@ -3454,7 +3476,6 @@ void TupleAggregateStep::prep2PhasesAggregate( { throw QueryDataExcept("prep2PhasesAggregate: UDAF multi function with no parms", aggregateFuncErr); } - ++udafcParamIdx; } break; @@ -3482,7 +3503,6 @@ void TupleAggregateStep::prep2PhasesAggregate( AGG_MAP aggDupFuncMap; projColsUDAFIdx = 0; - // copy over the groupby vector // update the outputColumnIndex if returned for (uint64_t i = 0; i < groupByPm.size(); i++) @@ -3494,7 +3514,6 @@ void TupleAggregateStep::prep2PhasesAggregate( // locate the return column position in aggregated rowgroup from PM // outIdx is i without the multi-columns, uint64_t outIdx = 0; - for (uint64_t i = 0; i < returnedColVec.size(); i++) { uint32_t retKey = returnedColVec[i].first; @@ -3511,7 +3530,6 @@ void TupleAggregateStep::prep2PhasesAggregate( // Is this a UDAF? use the function as part of the key. pUDAFFunc = NULL; udafc = NULL; - if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; @@ -3520,21 +3538,29 @@ void TupleAggregateStep::prep2PhasesAggregate( { udafc = dynamic_cast((*it).get()); projColsUDAFIdx++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); + // Save the multi-parm keys for dup-detection. + if (pUDAFFunc && udafc->getContext().getParamKeys()->size() == 0) + { + for (uint64_t k = i+1; + k < returnedColVec.size() && returnedColVec[k].second == AggregateColumn::MULTI_PARM; + ++k) + { + udafc->getContext().getParamKeys()->push_back(returnedColVec[k].first); + } + } break; } } - if (it == jobInfo.projectionCols.end()) { throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); } } - AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); + AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (it != aggFuncMap.end()) { @@ -3555,7 +3581,7 @@ void TupleAggregateStep::prep2PhasesAggregate( // check if a SUM or COUNT covered by AVG if (aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) { - it = aggFuncMap.find(boost::make_tuple(returnedColVec[i].first, ROWAGG_AVG, pUDAFFunc)); + it = aggFuncMap.find(boost::make_tuple(returnedColVec[i].first, ROWAGG_AVG, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (it != aggFuncMap.end()) { @@ -3680,7 +3706,6 @@ void TupleAggregateStep::prep2PhasesAggregate( { // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, outIdx)); @@ -3698,7 +3723,7 @@ void TupleAggregateStep::prep2PhasesAggregate( functionVecUm.push_back(funct); // find if this func is a duplicate - AGG_MAP::iterator iter = aggDupFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); + AGG_MAP::iterator iter = aggDupFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (iter != aggDupFuncMap.end()) { @@ -3715,14 +3740,13 @@ void TupleAggregateStep::prep2PhasesAggregate( } else { - aggDupFuncMap.insert(make_pair(boost::make_tuple(retKey, aggOp, pUDAFFunc), + aggDupFuncMap.insert(make_pair(boost::make_tuple(retKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), funct->fOutputColumnIndex)); } if (returnedColVec[i].second == AggregateColumn::AVG) avgFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } - ++outIdx; } @@ -3943,6 +3967,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // column index for PM aggregate rowgroup uint64_t colAggPm = 0; + uint64_t multiParm = 0; // for groupby column for (uint64_t i = 0; i < jobInfo.groupByColVec.size(); i++) @@ -3977,7 +4002,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( typeAggPm.push_back(typeProj[colProj]); widthAggPm.push_back(width[colProj]); - aggFuncMap.insert(make_pair(boost::make_tuple(keysAggPm[colAggPm], 0, pUDAFFunc), colAggPm)); + aggFuncMap.insert(make_pair(boost::make_tuple(keysAggPm[colAggPm], 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAggPm)); colAggPm++; } @@ -4018,7 +4043,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( typeAggPm.push_back(typeProj[colProj]); widthAggPm.push_back(width[colProj]); - aggFuncMap.insert(make_pair(boost::make_tuple(keysAggPm[colAggPm], 0, pUDAFFunc), colAggPm)); + aggFuncMap.insert(make_pair(boost::make_tuple(keysAggPm[colAggPm], 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAggPm)); colAggPm++; } @@ -4067,21 +4092,28 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; - for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); projColsUDAFIdx++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); + // Save the multi-parm keys for dup-detection. + if (pUDAFFunc && udafc->getContext().getParamKeys()->size() == 0) + { + for (uint64_t k = i+1; + k < aggColVec.size() && aggColVec[k].second == AggregateColumn::MULTI_PARM; + ++k) + { + udafc->getContext().getParamKeys()->push_back(aggColVec[k].first); + } + } // Create a RowAggFunctionCol (UDAF subtype) with the context. funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAggPm)); break; } } - if (it == jobInfo.projectionCols.end()) { throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -4093,11 +4125,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // skip if this is a duplicate - if (aggFuncMap.find(boost::make_tuple(aggKey, aggOp, pUDAFFunc)) != aggFuncMap.end()) + if (aggFuncMap.find(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)) != aggFuncMap.end()) continue; functionVecPm.push_back(funct); - aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc), colAggPm)); + aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAggPm-multiParm)); switch (aggOp) { @@ -4300,12 +4332,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // If the first param is const udafcParamIdx = 0; ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; } - ++udafcParamIdx; break; } @@ -4319,13 +4349,16 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( typeAggPm.push_back(typeProj[colProj]); widthAggPm.push_back(width[colProj]); multiParmIndexes.push_back(colAggPm); - colAggPm++; - + ++colAggPm; + ++multiParm; // If the param is const if (udafc) { + if (udafcParamIdx > udafc->aggParms().size() - 1) + { + throw QueryDataExcept("prep2PhasesDistinctAggregate: UDAF multi function with too many parms", aggregateFuncErr); + } ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; @@ -4335,7 +4368,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( { throw QueryDataExcept("prep2PhasesDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr); } - ++udafcParamIdx; } break; @@ -4378,17 +4410,15 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (funcPm->fAggFunction == ROWAGG_UDAF) { RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); - if (!udafFuncCol) { - throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } - funct.reset(new RowUDAFFunctionCol( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fOutputColumnIndex - multiParms, - udafFuncCol->fAuxColumnIndex - multiParms)); + udafFuncCol->fOutputColumnIndex-multiParms, + udafFuncCol->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); } @@ -4398,8 +4428,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funcPm->fAggFunction, funcPm->fStatsFunction, funcPm->fOutputColumnIndex, - funcPm->fOutputColumnIndex - multiParms, - funcPm->fAuxColumnIndex - multiParms)); + funcPm->fOutputColumnIndex-multiParms, + funcPm->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); pUDAFFunc = NULL; } @@ -4412,7 +4442,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( { continue; } - oidsAggUm.push_back(oidsAggPm[idx]); keysAggUm.push_back(keysAggPm[idx]); scaleAggUm.push_back(scaleAggPm[idx]); @@ -4449,7 +4478,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // locate the return column position in aggregated rowgroup from PM // outIdx is i without the multi-columns, uint64_t outIdx = 0; - for (uint64_t i = 0; i < returnedColVec.size(); i++) { pUDAFFunc = NULL; @@ -4470,19 +4498,26 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; - for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); projColsUDAFIdx++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); + // Save the multi-parm keys for dup-detection. + if (pUDAFFunc && udafc->getContext().getParamKeys()->size() == 0) + { + for (uint64_t k = i+1; + k < returnedColVec.size() && returnedColVec[k].second == AggregateColumn::MULTI_PARM; + ++k) + { + udafc->getContext().getParamKeys()->push_back(returnedColVec[k].first); + } + } break; } } - if (it == jobInfo.projectionCols.end()) { throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -4492,245 +4527,225 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { - AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, 0, pUDAFFunc)); + AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (it != aggFuncMap.end()) { colUm = it->second; } - else - { - ostringstream emsg; - emsg << "'" << jobInfo.keyInfo->tupleKeyToName[retKey] << "' isn't in tuple."; - cerr << "prep2PhasesDistinctAggregate: distinct " << emsg.str() - << " oid=" << (int) jobInfo.keyInfo->tupleKeyVec[retKey].fId - << ", alias=" << jobInfo.keyInfo->tupleKeyVec[retKey].fTable; - - if (jobInfo.keyInfo->tupleKeyVec[retKey].fView.length() > 0) - cerr << ", view=" << jobInfo.keyInfo->tupleKeyVec[retKey].fView; - - cerr << endl; - throw QueryDataExcept(emsg.str(), aggregateFuncErr); - } } - switch (aggOp) + if (colUm > -1) // Means we found a DISTINCT and have a column number { - case ROWAGG_DISTINCT_AVG: - - //avgFuncMap.insert(make_pair(key, funct)); - case ROWAGG_DISTINCT_SUM: + switch (aggOp) { - if (typeAggUm[colUm] == CalpontSystemCatalog::CHAR || - typeAggUm[colUm] == CalpontSystemCatalog::VARCHAR || - typeAggUm[colUm] == CalpontSystemCatalog::BLOB || - typeAggUm[colUm] == CalpontSystemCatalog::TEXT || - typeAggUm[colUm] == CalpontSystemCatalog::DATE || - typeAggUm[colUm] == CalpontSystemCatalog::DATETIME || - typeAggUm[colUm] == CalpontSystemCatalog::TIME) + case ROWAGG_DISTINCT_AVG: + + //avgFuncMap.insert(make_pair(key, funct)); + case ROWAGG_DISTINCT_SUM: { - Message::Args args; - args.add("sum/average"); - args.add(colTypeIdString(typeAggUm[colUm])); - string emsg = IDBErrorInfo::instance()-> - errorMsg(ERR_AGGREGATE_TYPE_NOT_SUPPORT, args); - cerr << "prep2PhasesDistinctAggregate: " << emsg << endl; - throw IDBExcept(emsg, ERR_AGGREGATE_TYPE_NOT_SUPPORT); + if (typeAggUm[colUm] == CalpontSystemCatalog::CHAR || + typeAggUm[colUm] == CalpontSystemCatalog::VARCHAR || + typeAggUm[colUm] == CalpontSystemCatalog::BLOB || + typeAggUm[colUm] == CalpontSystemCatalog::TEXT || + typeAggUm[colUm] == CalpontSystemCatalog::DATE || + typeAggUm[colUm] == CalpontSystemCatalog::DATETIME || + typeAggUm[colUm] == CalpontSystemCatalog::TIME) + { + Message::Args args; + args.add("sum/average"); + args.add(colTypeIdString(typeAggUm[colUm])); + string emsg = IDBErrorInfo::instance()-> + errorMsg(ERR_AGGREGATE_TYPE_NOT_SUPPORT, args); + cerr << "prep2PhasesDistinctAggregate: " << emsg << endl; + throw IDBExcept(emsg, ERR_AGGREGATE_TYPE_NOT_SUPPORT); + } + + oidsAggDist.push_back(oidsAggUm[colUm]); + keysAggDist.push_back(retKey); + + if (typeAggUm[colUm] != CalpontSystemCatalog::DOUBLE && + typeAggUm[colUm] != CalpontSystemCatalog::FLOAT) + { + if (isUnsigned(typeAggUm[colUm])) + { + typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); + precisionAggDist.push_back(20); + } + else + { + typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + precisionAggDist.push_back(19); + } + + uint32_t scale = scaleAggUm[colUm]; + + // for int average, FE expects a decimal + if (aggOp == ROWAGG_DISTINCT_AVG) + scale = jobInfo.scaleOfAvg[retKey]; // scale += 4; + + scaleAggDist.push_back(scale); + widthAggDist.push_back(bigIntWidth); + } + else + { + typeAggDist.push_back(typeAggUm[colUm]); + scaleAggDist.push_back(scaleAggUm[colUm]); + precisionAggDist.push_back(precisionAggUm[colUm]); + widthAggDist.push_back(widthAggUm[colUm]); + } } + // PM: put the count column for avg next to the sum + // let fall through to add a count column for average function + //if (aggOp != ROWAGG_DISTINCT_AVG) + break; - oidsAggDist.push_back(oidsAggUm[colUm]); - keysAggDist.push_back(retKey); - - if (typeAggUm[colUm] != CalpontSystemCatalog::DOUBLE && - typeAggUm[colUm] != CalpontSystemCatalog::FLOAT) + case ROWAGG_COUNT_DISTINCT_COL_NAME: { + oidsAggDist.push_back(oidsAggUm[colUm]); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(0); + // work around count() in select subquery + precisionAggDist.push_back(9999); + if (isUnsigned(typeAggUm[colUm])) { typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); - precisionAggDist.push_back(20); } else { typeAggDist.push_back(CalpontSystemCatalog::BIGINT); - precisionAggDist.push_back(19); } - uint32_t scale = scaleAggUm[colUm]; - - // for int average, FE expects a decimal - if (aggOp == ROWAGG_DISTINCT_AVG) - scale = jobInfo.scaleOfAvg[retKey]; // scale += 4; - - scaleAggDist.push_back(scale); widthAggDist.push_back(bigIntWidth); } - else - { - typeAggDist.push_back(typeAggUm[colUm]); - scaleAggDist.push_back(scaleAggUm[colUm]); - precisionAggDist.push_back(precisionAggUm[colUm]); - widthAggDist.push_back(widthAggUm[colUm]); - } - } - // PM: put the count column for avg next to the sum - // let fall through to add a count column for average function - //if (aggOp != ROWAGG_DISTINCT_AVG) - break; + break; - case ROWAGG_COUNT_DISTINCT_COL_NAME: + default: + // cound happen if agg and agg distinct use same column. + colUm = -1; + break; + } // switch + } + // For non distinct aggregates + if (colUm == -1) + { + AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); + + if (it != aggFuncMap.end()) { + colUm = it->second; oidsAggDist.push_back(oidsAggUm[colUm]); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(0); - // work around count() in select subquery - precisionAggDist.push_back(9999); - - if (isUnsigned(typeAggUm[colUm])) - { - typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); - } - else - { - typeAggDist.push_back(CalpontSystemCatalog::BIGINT); - } - - widthAggDist.push_back(bigIntWidth); + keysAggDist.push_back(keysAggUm[colUm]); + scaleAggDist.push_back(scaleAggUm[colUm]); + precisionAggDist.push_back(precisionAggUm[colUm]); + typeAggDist.push_back(typeAggUm[colUm]); + widthAggDist.push_back(widthAggUm[colUm]); } - break; - case ROWAGG_MIN: - case ROWAGG_MAX: - case ROWAGG_SUM: - case ROWAGG_AVG: - case ROWAGG_COUNT_ASTERISK: - case ROWAGG_COUNT_COL_NAME: - case ROWAGG_STATS: - case ROWAGG_BIT_AND: - case ROWAGG_BIT_OR: - case ROWAGG_BIT_XOR: - case ROWAGG_CONSTANT: - default: + // not a direct hit -- a returned column is not already in the RG from PMs + else { - AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); + bool returnColMissing = true; - if (it != aggFuncMap.end()) + // check if a SUM or COUNT covered by AVG + if (aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) { - colUm = it->second; - oidsAggDist.push_back(oidsAggUm[colUm]); - keysAggDist.push_back(keysAggUm[colUm]); - scaleAggDist.push_back(scaleAggUm[colUm]); - precisionAggDist.push_back(precisionAggUm[colUm]); - typeAggDist.push_back(typeAggUm[colUm]); - widthAggDist.push_back(widthAggUm[colUm]); - colUm -= multiParms; - } + it = aggFuncMap.find(boost::make_tuple(returnedColVec[i].first, ROWAGG_AVG, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); - // not a direct hit -- a returned column is not already in the RG from PMs - else - { - bool returnColMissing = true; - - // check if a SUM or COUNT covered by AVG - if (aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) + if (it != aggFuncMap.end()) { - it = aggFuncMap.find(boost::make_tuple(returnedColVec[i].first, ROWAGG_AVG, pUDAFFunc)); + // false alarm + returnColMissing = false; - if (it != aggFuncMap.end()) + colUm = it->second; + + if (aggOp == ROWAGG_SUM) { - // false alarm - returnColMissing = false; + oidsAggDist.push_back(oidsAggUm[colUm]); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(scaleAggUm[colUm] >> 8); + precisionAggDist.push_back(precisionAggUm[colUm]); + typeAggDist.push_back(typeAggUm[colUm]); + widthAggDist.push_back(widthAggUm[colUm]); + } + else + { + // leave the count() to avg + aggOp = ROWAGG_COUNT_NO_OP; - colUm = it->second; - - if (aggOp == ROWAGG_SUM) + oidsAggDist.push_back(oidsAggUm[colUm]); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(0); + if (isUnsigned(typeAggUm[colUm])) { - oidsAggDist.push_back(oidsAggUm[colUm]); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(scaleAggUm[colUm] >> 8); - precisionAggDist.push_back(precisionAggUm[colUm]); - typeAggDist.push_back(typeAggUm[colUm]); - widthAggDist.push_back(widthAggUm[colUm]); + precisionAggDist.push_back(20); + typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); } else { - // leave the count() to avg - aggOp = ROWAGG_COUNT_NO_OP; - - oidsAggDist.push_back(oidsAggUm[colUm]); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(0); - - if (isUnsigned(typeAggUm[colUm])) - { - precisionAggDist.push_back(20); - typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); - } - else - { - precisionAggDist.push_back(19); - typeAggDist.push_back(CalpontSystemCatalog::BIGINT); - } - - widthAggDist.push_back(bigIntWidth); + precisionAggDist.push_back(19); + typeAggDist.push_back(CalpontSystemCatalog::BIGINT); } + widthAggDist.push_back(bigIntWidth); } } - else if (find(jobInfo.expressionVec.begin(), jobInfo.expressionVec.end(), - retKey) != jobInfo.expressionVec.end()) - { - // a function on aggregation - TupleInfo ti = getTupleInfo(retKey, jobInfo); - oidsAggDist.push_back(ti.oid); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(ti.scale); - precisionAggDist.push_back(ti.precision); - typeAggDist.push_back(ti.dtype); - widthAggDist.push_back(ti.width); + } + else if (find(jobInfo.expressionVec.begin(), jobInfo.expressionVec.end(), + retKey) != jobInfo.expressionVec.end()) + { + // a function on aggregation + TupleInfo ti = getTupleInfo(retKey, jobInfo); + oidsAggDist.push_back(ti.oid); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(ti.scale); + precisionAggDist.push_back(ti.precision); + typeAggDist.push_back(ti.dtype); + widthAggDist.push_back(ti.width); - returnColMissing = false; - } - else if (jobInfo.windowSet.find(retKey) != jobInfo.windowSet.end()) - { - // a window function - TupleInfo ti = getTupleInfo(retKey, jobInfo); - oidsAggDist.push_back(ti.oid); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(ti.scale); - precisionAggDist.push_back(ti.precision); - typeAggDist.push_back(ti.dtype); - widthAggDist.push_back(ti.width); + returnColMissing = false; + } + else if (jobInfo.windowSet.find(retKey) != jobInfo.windowSet.end()) + { + // a window function + TupleInfo ti = getTupleInfo(retKey, jobInfo); + oidsAggDist.push_back(ti.oid); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(ti.scale); + precisionAggDist.push_back(ti.precision); + typeAggDist.push_back(ti.dtype); + widthAggDist.push_back(ti.width); - returnColMissing = false; - } - else if (aggOp == ROWAGG_CONSTANT) - { - TupleInfo ti = getTupleInfo(retKey, jobInfo); - oidsAggDist.push_back(ti.oid); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(ti.scale); - precisionAggDist.push_back(ti.precision); - typeAggDist.push_back(ti.dtype); - widthAggDist.push_back(ti.width); + returnColMissing = false; + } + else if (aggOp == ROWAGG_CONSTANT) + { + TupleInfo ti = getTupleInfo(retKey, jobInfo); + oidsAggDist.push_back(ti.oid); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(ti.scale); + precisionAggDist.push_back(ti.precision); + typeAggDist.push_back(ti.dtype); + widthAggDist.push_back(ti.width); - returnColMissing = false; - } + returnColMissing = false; + } - if (returnColMissing) - { - Message::Args args; - args.add(keyName(outIdx, retKey, jobInfo)); - string emsg = IDBErrorInfo::instance()-> - errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); - cerr << "prep2PhasesDistinctAggregate: " << emsg << " oid=" - << (int) jobInfo.keyInfo->tupleKeyVec[retKey].fId << ", alias=" - << jobInfo.keyInfo->tupleKeyVec[retKey].fTable << ", view=" - << jobInfo.keyInfo->tupleKeyVec[retKey].fView << ", function=" - << (int) aggOp << endl; - throw IDBExcept(emsg, ERR_NOT_GROUPBY_EXPRESSION); - } - } //else - } // switch - } + if (returnColMissing) + { + Message::Args args; + args.add(keyName(outIdx, retKey, jobInfo)); + string emsg = IDBErrorInfo::instance()-> + errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); + cerr << "prep2PhasesDistinctAggregate: " << emsg << " oid=" + << (int) jobInfo.keyInfo->tupleKeyVec[retKey].fId << ", alias=" + << jobInfo.keyInfo->tupleKeyVec[retKey].fTable << ", view=" + << jobInfo.keyInfo->tupleKeyVec[retKey].fView << ", function=" + << (int) aggOp << endl; + throw IDBExcept(emsg, ERR_NOT_GROUPBY_EXPRESSION); + } + } //else not a direct hit + } // else not a DISTINCT // update groupby vector if the groupby column is a returned column if (returnedColVec[i].second == 0) @@ -4757,7 +4772,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( { // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, outIdx)); @@ -4775,7 +4789,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( functionVecUm.push_back(funct); // find if this func is a duplicate - AGG_MAP::iterator iter = aggDupFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); + AGG_MAP::iterator iter = aggDupFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (iter != aggDupFuncMap.end()) { @@ -4792,7 +4806,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } else { - aggDupFuncMap.insert(make_pair(boost::make_tuple(retKey, aggOp, pUDAFFunc), + aggDupFuncMap.insert(make_pair(boost::make_tuple(retKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), funct->fOutputColumnIndex)); } @@ -4801,7 +4815,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } - ++outIdx; } // for (i @@ -5044,7 +5057,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( ++multiParms; continue; } - if (returnedColVec[k].first != distinctColKey) continue; @@ -5066,7 +5078,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex - multiParms)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -5092,7 +5104,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( ++multiParms; continue; } - // search non-distinct functions in functionVec vector::iterator it = functionVecUm.begin(); @@ -5110,7 +5121,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex - multiParms)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } else if (f->fAggFunction == ROWAGG_COUNT_ASTERISK || @@ -5131,7 +5142,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex - multiParms)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 2fa2150bf..46d5ff7cd 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -3665,11 +3665,16 @@ ReturnedColumn* buildFunctionColumn( gwi.no_parm_func_list.push_back(fc); } - // add the sign for addtime function - if (funcName == "add_time") + // func name is addtime/subtime in 10.3.9 + // note: this means get_time() can now go away in our server fork + if ((funcName == "addtime") || (funcName == "subtime")) { - Item_func_add_time* addtime = (Item_func_add_time*)ifp; - sptp.reset(new ParseTree(new ConstantColumn((int64_t)addtime->get_sign()))); + int64_t sign = 1; + if (funcName == "subtime") + { + sign = -1; + } + sptp.reset(new ParseTree(new ConstantColumn(sign))); funcParms.push_back(sptp); } @@ -4853,7 +4858,6 @@ void gp_walk(const Item* item, void* arg) { gp_walk_info* gwip = reinterpret_cast(arg); idbassert(gwip); - bool isCached = false; //Bailout... if (gwip->fatalParseError) return; @@ -4866,15 +4870,17 @@ void gp_walk(const Item* item, void* arg) if (itype == Item::FUNC_ITEM && ((Item_func*)item)->functype() == Item_func::XOR_FUNC ) itype = Item::COND_ITEM; - if (item->type() == Item::CACHE_ITEM) - { - item = ((Item_cache*)item)->get_example(); - itype = item->type(); - isCached = true; - } - switch (itype) { + case Item::CACHE_ITEM: + { + // The item or condition is cached as per MariaDB server view but + // for InfiniDB it need to be parsed and executed. + // MCOL-1188 and MCOL-1029 + Item* orig_item = ((Item_cache*)item)->get_example(); + orig_item->traverse_cond(gp_walk, gwip, Item::POSTFIX); + break; + } case Item::FIELD_ITEM: { Item_field* ifp = (Item_field*)item; @@ -5086,14 +5092,10 @@ void gp_walk(const Item* item, void* arg) cc->resultType(colType_MysqlToIDB(item)); } - // cached item comes in one piece - if (!isCached) - { - for (uint32_t i = 0; i < ifp->argument_count() && !gwip->rcWorkStack.empty(); i++) - { - gwip->rcWorkStack.pop(); - } - } + for (uint32_t i = 0; i < ifp->argument_count() && !gwip->rcWorkStack.empty(); i++) + { + gwip->rcWorkStack.pop(); + } // bug 3137. If filter constant like 1=0, put it to ptWorkStack // MariaDB bug 750. Breaks if compare is an argument to a function. @@ -5170,14 +5172,6 @@ void gp_walk(const Item* item, void* arg) bool isOr = (ftype == Item_func::COND_OR_FUNC); bool isXor = (ftype == Item_func::XOR_FUNC); - // MCOL-1029 A cached COND_ITEM is something like: - // AND (TRUE OR FALSE) - // We can skip it - if (isCached) - { - break; - } - List* argumentList; List xorArgumentList; diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp index 0c57ce8bc..f4a95bbc3 100644 --- a/dbcon/mysql/ha_window_function.cpp +++ b/dbcon/mysql/ha_window_function.cpp @@ -323,6 +323,9 @@ string ConvertFuncName(Item_sum* item) case Item_sum::LAG_FUNC: return "LAG"; break; + default: + // We just don't handle it. + break; }; return ""; diff --git a/dbcon/mysql/install_calpont_mysql.sh b/dbcon/mysql/install_calpont_mysql.sh index e81394cab..40135cc78 100755 --- a/dbcon/mysql/install_calpont_mysql.sh +++ b/dbcon/mysql/install_calpont_mysql.sh @@ -86,7 +86,14 @@ CREATE FUNCTION idbpartition RETURNS STRING soname 'libcalmysql.so'; CREATE FUNCTION idblocalpm RETURNS INTEGER soname 'libcalmysql.so'; CREATE FUNCTION mcssystemready RETURNS INTEGER soname 'libcalmysql.so'; CREATE FUNCTION mcssystemreadonly RETURNS INTEGER soname 'libcalmysql.so'; -CREATE AGGREGATE FUNCTION regr_avgx RETURNS REAL soname 'libudf_mysql.so'; +CREATE AGGREGATE FUNCTION regr_avgx RETURNS REAL soname 'libregr_mysql.so'; +CREATE AGGREGATE FUNCTION regr_avgy RETURNS REAL soname 'libregr_mysql.so'; +CREATE AGGREGATE FUNCTION regr_count RETURNS INTEGER soname 'libregr_mysql.so'; +CREATE AGGREGATE FUNCTION regr_slope RETURNS REAL soname 'libregr_mysql.so'; +CREATE AGGREGATE FUNCTION regr_intercept RETURNS REAL soname 'libregr_mysql.so'; +CREATE AGGREGATE FUNCTION regr_r2 RETURNS REAL soname 'libregr_mysql.so'; + +CREATE AGGREGATE FUNCTION distinct_count RETURNS INTEGER soname 'libudf_mysql.so'; CREATE DATABASE IF NOT EXISTS infinidb_vtable; CREATE DATABASE IF NOT EXISTS infinidb_querystats; diff --git a/dbcon/mysql/sm.h b/dbcon/mysql/sm.h index 65cf35123..dafa64419 100644 --- a/dbcon/mysql/sm.h +++ b/dbcon/mysql/sm.h @@ -64,7 +64,7 @@ const int CALPONT_INTERNAL_ERROR = -1007; //extern std::ofstream smlog; //#define SMDEBUGLOG smlog //#else -#define SMDEBUGLOG if (true) std::cerr +#define SMDEBUGLOG if (false) std::cout //#endif extern const std::string DEFAULT_SAVE_PATH; diff --git a/ddlproc/ddlproc.cpp b/ddlproc/ddlproc.cpp index 4fbee7843..6f827f8fd 100644 --- a/ddlproc/ddlproc.cpp +++ b/ddlproc/ddlproc.cpp @@ -138,8 +138,30 @@ int main(int argc, char* argv[]) { oam.processInitComplete("DDLProc", ACTIVE); } + catch (std::exception& ex) + { + cerr << ex.what() << endl; + LoggingID logid(23, 0, 0); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("DDLProc init caught exception: "); + args1.add(ex.what()); + msg.format( args1 ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); + return 1; + } catch (...) { + cerr << "Caught unknown exception in init!" << endl; + LoggingID logid(23, 0, 0); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("DDLProc init caught unknown exception"); + msg.format( args1 ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); + return 1; } } @@ -150,21 +172,28 @@ int main(int argc, char* argv[]) catch (std::exception& ex) { cerr << ex.what() << endl; + LoggingID logid(23, 0, 0); Message::Args args; Message message(8); args.add("DDLProc failed on: "); args.add(ex.what()); message.format( args ); - + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, message, logid); + return 1; } catch (...) { cerr << "Caught unknown exception!" << endl; + LoggingID logid(23, 0, 0); Message::Args args; Message message(8); args.add("DDLProc failed on: "); - args.add("receiving DDLPackage"); + args.add("receiving DDLPackage (unknown exception)"); message.format( args ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, message, logid); + return 1; } return 0; diff --git a/dmlproc/dmlproc.cpp b/dmlproc/dmlproc.cpp index 5fa99be39..deb936422 100644 --- a/dmlproc/dmlproc.cpp +++ b/dmlproc/dmlproc.cpp @@ -535,8 +535,30 @@ int main(int argc, char* argv[]) // At first we set to BUSY_INIT oam.processInitComplete("DMLProc", oam::BUSY_INIT); } + catch (std::exception& ex) + { + cerr << ex.what() << endl; + LoggingID logid(21, 0, 0); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("DMLProc init caught exception: "); + args1.add(ex.what()); + msg.format( args1 ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); + return 1; + } catch (...) { + cerr << "Caught unknown exception in init!" << endl; + LoggingID logid(21, 0, 0); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("DMLProc init caught unknown exception"); + msg.format( args1 ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); + return 1; } //@Bug 1627 @@ -618,8 +640,30 @@ int main(int argc, char* argv[]) { oam.processInitComplete("DMLProc", ACTIVE); } + catch (std::exception& ex) + { + cerr << ex.what() << endl; + LoggingID logid(21, 0, 0); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("DMLProc init caught exception: "); + args1.add(ex.what()); + msg.format( args1 ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); + return 1; + } catch (...) { + cerr << "Caught unknown exception in init!" << endl; + LoggingID logid(21, 0, 0); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("DMLProc init caught unknown exception"); + msg.format( args1 ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); + return 1; } Dec = DistributedEngineComm::instance(rm); diff --git a/dmlproc/dmlprocessor.cpp b/dmlproc/dmlprocessor.cpp index f143ced83..861198d45 100644 --- a/dmlproc/dmlprocessor.cpp +++ b/dmlproc/dmlprocessor.cpp @@ -1225,8 +1225,28 @@ void DMLServer::start() cancelThread.join(); } + catch (std::exception& ex) + { + cerr << ex.what() << endl; + logging::LoggingID lid(21); + Message::Args args; + Message message(8); + args.add("DMLProc init caught exception: "); + args.add(ex.what()); + message.format(args); + logging::Logger logger(lid.fSubsysID); + logger.logMessage(logging::LOG_TYPE_CRITICAL, message, lid); + } catch (...) { + cerr << "Caught unknown exception!" << endl; + logging::LoggingID lid(21); + Message::Args args; + Message message(8); + args.add("DMLProc init caught unknown exception"); + message.format(args); + logging::Logger logger(lid.fSubsysID); + logger.logMessage(logging::LOG_TYPE_CRITICAL, message, lid); } } diff --git a/exemgr/main.cpp b/exemgr/main.cpp index 7f91d1d4a..a403360ac 100644 --- a/exemgr/main.cpp +++ b/exemgr/main.cpp @@ -105,6 +105,7 @@ using namespace querytele; #include "dbrm.h" #endif + #include "installdir.h" namespace @@ -1391,8 +1392,34 @@ void cleanTempDir() assert(tmpPrefix != "/"); /* This is quite scary as ExeMgr usually runs as root */ - boost::filesystem::remove_all(tmpPrefix); - boost::filesystem::create_directories(tmpPrefix); + try + { + boost::filesystem::remove_all(tmpPrefix); + boost::filesystem::create_directories(tmpPrefix); + } + catch (std::exception& ex) + { + cerr << ex.what() << endl; + LoggingID logid(16, 0, 0); + Message::Args args; + Message message(8); + args.add("Execption whilst cleaning tmpdir: "); + args.add(ex.what()); + message.format( args ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_WARNING, message, logid); + } + catch (...) + { + cerr << "Caught unknown exception during tmpdir cleanup" << endl; + LoggingID logid(16, 0, 0); + Message::Args args; + Message message(8); + args.add("Unknown execption whilst cleaning tmpdir"); + message.format( args ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_WARNING, message, logid); + } } diff --git a/genii.vpw b/genii.vpw index 69e258339..686b01ed7 100644 --- a/genii.vpw +++ b/genii.vpw @@ -44,6 +44,7 @@ + diff --git a/oam/install_scripts/columnstoreAlias b/oam/install_scripts/columnstoreAlias index cd225c1a9..255eb7e7e 100644 --- a/oam/install_scripts/columnstoreAlias +++ b/oam/install_scripts/columnstoreAlias @@ -10,5 +10,8 @@ alias core='cd /var/log/mariadb/columnstore/corefiles' alias tmsg='tail -f /var/log/messages' alias tdebug='tail -f /var/log/mariadb/columnstore/debug.log' alias tinfo='tail -f /var/log/mariadb/columnstore/info.log' +alias terror='tail -f /var/log/mariadb/columnstore/err.log' +alias twarning='tail -f /var/log/mariadb/columnstore/warning.log' +alias tcrit='tail -f /var/log/mariadb/columnstore/crit.log' alias dbrm='cd /usr/local/mariadb/columnstore/data1/systemFiles/dbrm' alias module='cat /usr/local/mariadb/columnstore/local/module' diff --git a/oam/install_scripts/post-mysql-install b/oam/install_scripts/post-mysql-install index d02088fff..6f99b2284 100755 --- a/oam/install_scripts/post-mysql-install +++ b/oam/install_scripts/post-mysql-install @@ -74,9 +74,11 @@ fi if [ -f $installdir/lib/libcalmysql.so.1.0.0 ]; then libcalmysql=$installdir/lib/libcalmysql.so.1.0.0 libudfsdk=$installdir/lib/libudf_mysql.so.1.0.0 + libregrsdk=$installdir/lib/libregr_mysql.so.1.0.0 elif [ -f $installdir/lib/libcalmysql.so.1 ]; then libcalmysql=$installdir/lib/libcalmysql.so.1 libudfsdk=$installdir/lib/libudf_mysql.so.1 + libregrsdk=$installdir/lib/libregr_mysql.so.1 else libcalmysql= fi @@ -86,6 +88,7 @@ if [ -d $installdir/mysql/lib64/mysql/plugin -a -n "$libcalmysql" ]; then ln -sf $libcalmysql libcalmysql.so ln -sf $libcalmysql libcalmysqlent.so ln -sf $libudfsdk libudf_mysql.so + ln -sf $libregrsdk libregr_mysql.so fi if [ $installdir != "/usr/local/mariadb/columnstore" ]; then diff --git a/oam/install_scripts/post-mysqld-install b/oam/install_scripts/post-mysqld-install index 58f2b3d65..e712b2813 100755 --- a/oam/install_scripts/post-mysqld-install +++ b/oam/install_scripts/post-mysqld-install @@ -83,6 +83,7 @@ chown -R $user.$user $installdir/mysql if [ -f $installdir/lib/libcalmysql.so.1.0.0 ]; then libcalmysql=$installdir/lib/libcalmysql.so.1.0.0 libudfsdk=$installdir/lib/libudf_mysql.so.1.0.0 + libregrsdk=$installdir/lib/libregr_mysql.so.1.0.0 is_columnstore_tables=$installdir/lib/is_columnstore_tables.so.1.0.0 is_columnstore_columns=$installdir/lib/is_columnstore_columns.so.1.0.0 is_columnstore_extents=$installdir/lib/is_columnstore_extents.so.1.0.0 @@ -90,6 +91,7 @@ if [ -f $installdir/lib/libcalmysql.so.1.0.0 ]; then elif [ -f $installdir/lib/libcalmysql.so.1 ]; then libcalmysql=$installdir/lib/libcalmysql.so.1 libudfsdk=$installdir/lib/libudf_mysql.so.1 + libregrsdk=$installdir/lib/libregr_mysql.so.1 is_columnstore_tables=$installdir/lib/is_columnstore_tables.so.1 is_columnstore_columns=$installdir/lib/is_columnstore_columns.so.1 is_columnstore_extents=$installdir/lib/is_columnstore_extents.so.1 @@ -104,6 +106,7 @@ if [ -n "$libcalmysql" ]; then ln -sf $libcalmysql libcalmysql.so ln -sf $libcalmysql libcalmysqlent.so ln -sf $libudfsdk libudf_mysql.so + ln -sf $libregrsdk libregr_mysql.so ln -sf $is_columnstore_tables is_columnstore_tables.so ln -sf $is_columnstore_columns is_columnstore_columns.so ln -sf $is_columnstore_extents is_columnstore_extents.so diff --git a/oam/oamcpp/liboamcpp.cpp b/oam/oamcpp/liboamcpp.cpp index 73bf5c9d2..dcb1671ba 100644 --- a/oam/oamcpp/liboamcpp.cpp +++ b/oam/oamcpp/liboamcpp.cpp @@ -8274,8 +8274,7 @@ std::string Oam::getEC2LocalInstance(std::string name) string file = tmpdir + "/getInstanceInfo_" + name; string cmd = InstallDir + "/bin/MCSInstanceCmds.sh getInstance > " + file; int status = system(cmd.c_str()); - - if (WEXITSTATUS(status) != 0 ) + if (WEXITSTATUS(status) == 1 ) return "failed"; // get Instance Name @@ -8308,8 +8307,7 @@ std::string Oam::getEC2LocalInstanceType(std::string name) string file = tmpdir + "/getInstanceType_" + name; string cmd = InstallDir + "/bin/MCSInstanceCmds.sh getType > " + file; int status = system(cmd.c_str()); - - if (WEXITSTATUS(status) != 0 ) + if (WEXITSTATUS(status) == 1 ) return "failed"; // get Instance Name @@ -8342,8 +8340,7 @@ std::string Oam::getEC2LocalInstanceSubnet(std::string name) string file = tmpdir + "/getInstanceSubnet_" + name; string cmd = InstallDir + "/bin/MCSInstanceCmds.sh getSubnet > " + file; int status = system(cmd.c_str()); - - if (WEXITSTATUS(status) != 0 ) + if (WEXITSTATUS(status) == 1 ) return "failed"; // get Instance Name @@ -8377,8 +8374,7 @@ std::string Oam::launchEC2Instance( const std::string name, const std::string IP string file = tmpdir + "/getInstance_" + name; string cmd = InstallDir + "/bin/MCSInstanceCmds.sh launchInstance " + IPAddress + " " + type + " " + group + " > " + file; int status = system(cmd.c_str()); - - if (WEXITSTATUS(status) != 0 ) + if (WEXITSTATUS(status) == 1 ) return "failed"; if (checkLogStatus(file, "Required") ) @@ -8455,8 +8451,7 @@ bool Oam::startEC2Instance(std::string instanceName) // run script to get Instance status and IP Address string cmd = InstallDir + "/bin/MCSInstanceCmds.sh startInstance " + instanceName + " > " + tmpdir + "/startEC2Instance_" + instanceName; int ret = system(cmd.c_str()); - - if (WEXITSTATUS(ret) != 0 ) + if (WEXITSTATUS(ret) == 1 ) return false; return true; @@ -8475,8 +8470,7 @@ bool Oam::assignElasticIP(std::string instanceName, std::string IpAddress) // run script to get Instance status and IP Address string cmd = InstallDir + "/bin/MCSInstanceCmds.sh assignElasticIP " + instanceName + " " + IpAddress + " > " + tmpdir + "/assignElasticIP_" + instanceName; int ret = system(cmd.c_str()); - - if (WEXITSTATUS(ret) != 0 ) + if (WEXITSTATUS(ret) == 1 ) exceptionControl("assignElasticIP", oam::API_FAILURE); return true; @@ -8495,8 +8489,7 @@ bool Oam::deassignElasticIP(std::string IpAddress) // run script to get Instance status and IP Address string cmd = InstallDir + "/bin/MCSInstanceCmds.sh deassignElasticIP " + IpAddress + " > " + tmpdir + "/deassignElasticIP_" + IpAddress; int ret = system(cmd.c_str()); - - if (WEXITSTATUS(ret) != 0 ) + if (WEXITSTATUS(ret) == 1 ) exceptionControl("deassignElasticIP", oam::API_FAILURE); return true; @@ -8515,9 +8508,9 @@ std::string Oam::getEC2VolumeStatus(std::string volumeName) // run script to get Volume Status string cmd = InstallDir + "/bin/MCSVolumeCmds.sh describe " + volumeName + " > " + tmpdir + "/getVolumeStatus_" + volumeName; int ret = system(cmd.c_str()); - - if (WEXITSTATUS(ret) != 0 ) + if (WEXITSTATUS(ret) == 1 ){ return "failed"; + } // get status string status; @@ -8550,8 +8543,7 @@ std::string Oam::createEC2Volume(std::string size, std::string name) string file = tmpdir + "/createVolumeStatus_" + name; string cmd = InstallDir + "/bin/MCSVolumeCmds.sh create " + size + " " + name + " > " + file; int ret = system(cmd.c_str()); - - if (WEXITSTATUS(ret) != 0 ) + if (WEXITSTATUS(ret) == 1 ) return "failed"; // get status @@ -8598,12 +8590,16 @@ bool Oam::attachEC2Volume(std::string volumeName, std::string deviceName, std::s string cmd = InstallDir + "/bin/MCSVolumeCmds.sh attach " + volumeName + " " + instanceName + " " + deviceName + " > " + tmpdir + "/attachVolumeStatus_" + volumeName; ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) == 0 ) - return true; + if (WEXITSTATUS(ret) == 1 ) + { + //failing to attach, dettach and retry + writeLog("attachEC2Volume: Attach failed, call detach:" + volumeName + " " + instanceName + " " + deviceName, LOG_TYPE_ERROR ); - //failing to attach, dettach and retry - detachEC2Volume(volumeName); - } + detachEC2Volume(volumeName); + } + else + return true; + } if (ret == 0 ) return true; @@ -8624,8 +8620,7 @@ bool Oam::detachEC2Volume(std::string volumeName) // run script to attach Volume string cmd = InstallDir + "/bin/MCSVolumeCmds.sh detach " + volumeName + " > " + tmpdir + "/detachVolumeStatus_" + volumeName; int ret = system(cmd.c_str()); - - if (WEXITSTATUS(ret) != 0 ) + if (WEXITSTATUS(ret) == 1 ) return false; return true; @@ -8644,8 +8639,7 @@ bool Oam::deleteEC2Volume(std::string volumeName) // run script to delete Volume string cmd = InstallDir + "/bin/MCSVolumeCmds.sh delete " + volumeName + " > " + tmpdir + "/deleteVolumeStatus_" + volumeName; int ret = system(cmd.c_str()); - - if (WEXITSTATUS(ret) != 0 ) + if (WEXITSTATUS(ret) == 1 ) return false; return true; @@ -8664,8 +8658,7 @@ bool Oam::createEC2tag(std::string resourceName, std::string tagName, std::strin // run script to create a tag string cmd = InstallDir + "/bin/MCSVolumeCmds.sh createTag " + resourceName + " " + tagName + " " + tagValue + " > " + tmpdir + "createTagStatus_" + resourceName; int ret = system(cmd.c_str()); - - if (WEXITSTATUS(ret) != 0 ) + if (WEXITSTATUS(ret) == 1 ) return false; return true; diff --git a/oamapps/mcsadmin/mcsadmin.cpp b/oamapps/mcsadmin/mcsadmin.cpp index 20ec493a5..87e5b9a30 100644 --- a/oamapps/mcsadmin/mcsadmin.cpp +++ b/oamapps/mcsadmin/mcsadmin.cpp @@ -7864,6 +7864,7 @@ int processCommand(string* arguments) { try { + cout << endl << " Starting Modules" << endl; oam.startModule(devicenetworklist, ackTemp); @@ -7873,6 +7874,7 @@ int processCommand(string* arguments) sleep(15); cout << " Successful start of Modules " << endl; + } catch (exception& e) { diff --git a/primitives/blockcache/filebuffermgr.cpp b/primitives/blockcache/filebuffermgr.cpp index 65ad2b8f0..013ef7603 100644 --- a/primitives/blockcache/filebuffermgr.cpp +++ b/primitives/blockcache/filebuffermgr.cpp @@ -120,7 +120,10 @@ void FileBufferMgr::flushCache() // the block pool should not be freed in the above block to allow us // to continue doing concurrent unprotected-but-"safe" memcpys // from that memory - + if (fReportFrequency) + { + fLog << "Clearing entire cache" << endl; + } fFBPool.clear(); // fFBPool.reserve(fMaxNumBlocks); } @@ -154,7 +157,15 @@ void FileBufferMgr::flushMany(const LbidAtVer* laVptr, uint32_t cnt) BRM::LBID_t lbid; BRM::VER_t ver; filebuffer_uset_iter_t iter; - + if (fReportFrequency) + { + fLog << "flushMany " << cnt << " items: "; + for (uint32_t j = 0; j < cnt; j++) + { + fLog << "lbid: " << laVptr[j].LBID << " ver: " << laVptr[j].Ver << ", "; + } + fLog << endl; + } for (uint32_t j = 0; j < cnt; j++) { lbid = static_cast(laVptr->LBID); @@ -163,6 +174,10 @@ void FileBufferMgr::flushMany(const LbidAtVer* laVptr, uint32_t cnt) if (iter != fbSet.end()) { + if (fReportFrequency) + { + fLog << "flushMany hit, lbid: " << lbid << " index: " << iter->poolIdx << endl; + } //remove it from fbList uint32_t idx = iter->poolIdx; fbList.erase(fFBPool[idx].listLoc()); @@ -186,6 +201,16 @@ void FileBufferMgr::flushManyAllversion(const LBID_t* laVptr, uint32_t cnt) mutex::scoped_lock lk(fWLock); + if (fReportFrequency) + { + fLog << "flushManyAllversion " << cnt << " items: "; + for (uint32_t i = 0; i < cnt; i++) + { + fLog << laVptr[i] << ", "; + } + fLog << endl; + } + if (fCacheSize == 0 || cnt == 0) return; @@ -196,6 +221,10 @@ void FileBufferMgr::flushManyAllversion(const LBID_t* laVptr, uint32_t cnt) { if (uniquer.find(it->lbid) != uniquer.end()) { + if (fReportFrequency) + { + fLog << "flushManyAllversion hit: " << it->lbid << " index: " << it->poolIdx << endl; + } const uint32_t idx = it->poolIdx; fbList.erase(fFBPool[idx].listLoc()); fEmptyPoolSlots.push_back(idx); @@ -222,6 +251,16 @@ void FileBufferMgr::flushOIDs(const uint32_t* oids, uint32_t count) pair itList; filebuffer_uset_t::iterator it; + if (fReportFrequency) + { + fLog << "flushOIDs " << count << " items: "; + for (uint32_t i = 0; i < count; i++) + { + fLog << oids[i] << ", "; + } + fLog << endl; + } + // If there are more than this # of extents to drop, the whole cache will be cleared const uint32_t clearThreshold = 50000; @@ -286,6 +325,22 @@ void FileBufferMgr::flushPartition(const vector& oids, const set::iterator sit; + fLog << "flushPartition oids: "; + for (uint32_t i = 0; i < count; i++) + { + fLog << oids[i] << ", "; + } + fLog << "flushPartition partitions: "; + for (sit = partitions.begin(); sit != partitions.end(); ++sit) + { + fLog << (*sit).toString() << ", "; + } + fLog << endl; + } + if (fCacheSize == 0 || oids.size() == 0 || partitions.size() == 0) return; @@ -554,7 +609,7 @@ int FileBufferMgr::insert(const BRM::LBID_t lbid, const BRM::VER_t ver, const ui { struct timespec tm; clock_gettime(CLOCK_MONOTONIC, &tm); - fLog + fLog << "insert: " << left << fixed << ((double)(tm.tv_sec + (1.e-9 * tm.tv_nsec))) << " " << right << setw(12) << fBlksLoaded << " " << right << setw(12) << fBlksNotUsed << endl; @@ -743,9 +798,13 @@ int FileBufferMgr::bulkInsert(const vector& ops) mutex::scoped_lock lk(fWLock); - for (i = 0; i < ops.size(); i++) + if (fReportFrequency) { - const CacheInsert_t& op = ops[i]; + fLog << "bulkInsert: "; + } + + for (i = 0; i < ops.size(); i++) { + const CacheInsert_t &op = ops[i]; if (gPMProfOn && gPMStatsPtr) #ifdef _MSC_VER @@ -770,7 +829,10 @@ int FileBufferMgr::bulkInsert(const vector& ops) continue; } - //cout << "FBM: inserting <" << op.lbid << ", " << op.ver << endl; + if (fReportFrequency) + { + fLog << op.lbid << " " << op.ver << ", "; + } fCacheSize++; fBlksLoaded++; FBData_t fbdata = {op.lbid, op.ver, 0}; @@ -790,7 +852,10 @@ int FileBufferMgr::bulkInsert(const vector& ops) #endif ret++; } - + if (fReportFrequency) + { + fLog << endl; + } idbassert(fCacheSize <= maxCacheSize()); return ret; diff --git a/procmgr/main.cpp b/procmgr/main.cpp index 83f41f561..640b7f630 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -1615,7 +1615,7 @@ void pingDeviceThread() if (moduleInfoList[moduleName] >= ModuleHeartbeatCount || opState == oam::DOWN || opState == oam::AUTO_DISABLED) { - log.writeLog(__LINE__, "Module alive, bring it back online: " + moduleName, LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "*** Module alive, bring it back online: " + moduleName, LOG_TYPE_DEBUG); string PrimaryUMModuleName = config.moduleName(); @@ -2087,7 +2087,7 @@ void pingDeviceThread() { //Log failure, issue alarm, set moduleOpState Configuration config; - log.writeLog(__LINE__, "module is down: " + moduleName, LOG_TYPE_CRITICAL); + log.writeLog(__LINE__, "*** module is down: " + moduleName, LOG_TYPE_CRITICAL); //set query system state not ready processManager.setQuerySystemState(false); @@ -2171,9 +2171,6 @@ void pingDeviceThread() // resume the dbrm oam.dbrmctl("resume"); log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - - //set recycle process - processManager.recycleProcess(moduleName); } // return values = 'ip address' for running or rebooting, stopped or terminated diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 30e69c77f..45df060fb 100644 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -630,10 +630,12 @@ void processMSG(messageqcpp::IOSocket* cfIos) if ( count > 0 ) { + string module = oam::UnassignedName; for (int i = 0; i < count; i++) { msg >> value; devicenetworkconfig.DeviceName = value; + module = value; msg >> value; devicenetworkconfig.UserTempDeviceName = value; msg >> value; @@ -663,24 +665,21 @@ void processMSG(messageqcpp::IOSocket* cfIos) if ( status == API_SUCCESS) { - //distribute config file - processManager.distributeConfigFile("system"); + processManager.setSystemState(oam::BUSY_INIT); - //call dbrm control - oam.dbrmctl("halt"); - log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG); + //set query system state not ready + processManager.setQuerySystemState(false); - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); + //set recycle process + processManager.recycleProcess(target, true); - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); + //distribute config file + processManager.distributeConfigFile("system"); + + //set query system state ready + processManager.setQuerySystemState(true); -// processManager.restartProcessType("ExeMgr"); - - //setup MySQL Replication for started modules -// log.writeLog(__LINE__, "Setup MySQL Replication for module being started", LOG_TYPE_DEBUG); -// processManager.setMySQLReplication(startdevicenetworklist); + processManager.setSystemState(oam::ACTIVE); } } else @@ -923,7 +922,6 @@ void processMSG(messageqcpp::IOSocket* cfIos) //set query system state ready processManager.setQuerySystemState(true); - } else { @@ -1772,7 +1770,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) oam::DeviceNetworkList devicenetworklist; pthread_t startsystemthread; - pthread_create (&startsystemthread, NULL, (void* (*)(void*)) &startSystemThread, &devicenetworklist); + status = pthread_create (&startsystemthread, NULL, (void*(*)(void*)) &startSystemThread, &devicenetworklist); if ( status != 0 ) { @@ -1782,20 +1780,19 @@ void processMSG(messageqcpp::IOSocket* cfIos) if (status == 0 && ackIndicator) { - // BUG 4554 We don't need the join because calpont console is now looking for "Active" - // We need to return the ack right away to let console know we got the message. -// pthread_join(startsystemthread, NULL); -// status = startsystemthreadStatus; + pthread_join(startsystemthread, NULL); + status = startsystemthreadStatus; } - - // setup MySQL Replication after switchover command - /* if (graceful == FORCEFUL) + + // setup MySQL Replication after FORCE restart command + if ( (status == API_SUCCESS) && + (graceful == oam::FORCEFUL) ) { - log.writeLog(__LINE__, "Setup MySQL Replication for restartSystem FORCE, used by switch-parent command", LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "Setup MySQL Replication for restartSystem FORCE", LOG_TYPE_DEBUG); oam::DeviceNetworkList devicenetworklist; - processManager.setMySQLReplication(devicenetworklist); + processManager.setMySQLReplication(devicenetworklist, oam::UnassignedName, true); } - */ + log.writeLog(__LINE__, "RESTARTSYSTEM: Start System Request Completed", LOG_TYPE_INFO); } @@ -3065,15 +3062,16 @@ void processMSG(messageqcpp::IOSocket* cfIos) processManager.reinitProcessType("cpimport"); //request reinit after Process is active - for ( int i = 0; i < 600 ; i++ ) - { - try - { - ProcessStatus procstat; - oam.getProcessStatus(processName, moduleName, procstat); + for ( int i = 0; i < 10 ; i++ ) { + try { + ProcessStatus procstat; + oam.getProcessStatus(processName, moduleName, procstat); - if (procstat.ProcessOpState == oam::ACTIVE) - { + if (procstat.ProcessOpState == oam::COLD_STANDBY) + break; + + if ( (procstat.ProcessOpState == oam::ACTIVE) || + (procstat.ProcessOpState == oam::STANDBY) ) { // if a PrimProc was restarted, reinit ACTIVE ExeMgr(s) and DDL/DMLProc if ( processName == "PrimProc") { @@ -3160,11 +3158,14 @@ void processMSG(messageqcpp::IOSocket* cfIos) } - // if a DDLProc was restarted, reinit DMLProc + // if a DDLProc was restarted, restart DMLProc if ( processName == "DDLProc") { processManager.reinitProcessType("DMLProc"); + //set query system states ready processManager.setQuerySystemState(true); + + processManager.setSystemState(oam::ACTIVE); } //only run on auto process restart @@ -3222,6 +3223,8 @@ void processMSG(messageqcpp::IOSocket* cfIos) processManager.setQuerySystemState(true); processManager.setSystemState(oam::ACTIVE); + + log.writeLog(__LINE__, "MSG RECEIVED: Process Restarted Completed"); } break; @@ -3604,6 +3607,7 @@ int ProcessManager::shutdownModule(string target, ByteStream::byte actionIndicat int ProcessManager::disableModule(string target, bool manualFlag) { Oam oam; + ProcessManager processManager(config, log); ModuleConfig moduleconfig; log.writeLog(__LINE__, "disableModule request for " + target, LOG_TYPE_DEBUG); @@ -3719,6 +3723,11 @@ int ProcessManager::disableModule(string target, bool manualFlag) if ( updateWorkerNodeconfig() != API_SUCCESS ) return API_FAILURE; + processManager.recycleProcess(target); + + //check for SIMPLEX Processes on mate might need to be started + processManager.checkSimplexModule(target); + //distribute config file distributeConfigFile("system"); @@ -3730,7 +3739,7 @@ int ProcessManager::disableModule(string target, bool manualFlag) /****************************************************************************************** * @brief recycleProcess * -* purpose: recyle process, generally after some disable module is run +* purpose: recyle process, done after disable/enable module * ******************************************************************************************/ void ProcessManager::recycleProcess(string module, bool enableModule) @@ -3750,53 +3759,40 @@ void ProcessManager::recycleProcess(string module, bool enableModule) } catch (...) {} - // restart DBRM Process and DMLProc and return if enable module is being done - if (enableModule) - { - //recycle DBRM processes in all cases - restartProcessType("DBRMControllerNode"); - restartProcessType("DBRMWorkerNode"); + stopProcessType("WriteEngineServer"); - restartProcessType("DMLProc"); - return; - } + stopProcessType("ExeMgr"); + + stopProcessType("PrimProc"); - //recycle DBRM processes in all cases - restartProcessType("DBRMControllerNode", module); - restartProcessType("DBRMWorkerNode"); + stopProcessType("DBRMControllerNode"); + stopProcessType("DBRMWorkerNode"); - // only recycle dmlproc, if down/up module is non-parent UM - if ( ( moduleType == "um" ) && - ( PrimaryUMModuleName != module) ) - { - restartProcessType("DMLProc", module); - return; - } + stopProcessType("DDLProc"); + stopProcessType("DMLProc"); - if ( PrimaryUMModuleName == module) - { - stopProcessType("DDLProc"); - stopProcessType("DMLProc"); - } + stopProcessType("mysqld"); - stopProcessType("ExeMgr"); +// restartProcessType("mysqld"); + + startProcessType("DBRMControllerNode"); + startProcessType("DBRMWorkerNode"); - restartProcessType("PrimProc"); - sleep(1); - - restartProcessType("mysqld"); - - restartProcessType("WriteEngineServer"); - sleep(1); + startProcessType("PrimProc"); + sleep(5); + + startProcessType("WriteEngineServer"); + sleep(3); startProcessType("ExeMgr"); - sleep(1); startProcessType("DDLProc"); sleep(1); startProcessType("DMLProc"); + startProcessType("mysqld"); + return; } @@ -3844,11 +3840,7 @@ int ProcessManager::enableModule(string target, int state, bool failover) if ( newStandbyModule == target) setStandbyModule(newStandbyModule); - - //set recycle process - if (!failover) - recycleProcess(target); - + log.writeLog(__LINE__, "enableModule request for " + target + " completed", LOG_TYPE_DEBUG); return API_SUCCESS; @@ -7091,17 +7083,27 @@ void ProcessManager::setQuerySystemState(bool set) Oam oam; BRM::DBRM dbrm; - log.writeLog(__LINE__, "setQuerySystemState = " + oam.itoa(set), LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "setQuerySystemState called = " + oam.itoa(set), LOG_TYPE_DEBUG); try { dbrm.setSystemQueryReady(set); - log.writeLog(__LINE__, "setQuerySystemState successful", LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "setQuerySystemState failed", LOG_TYPE_DEBUG); - log.writeLog(__LINE__, "setQuerySystemState failed", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "setSystemQueryReady = " + oam.itoa(set), LOG_TYPE_DEBUG); + + try { + dbrm.setSystemReady(set); + log.writeLog(__LINE__, "setSystemReady = " + oam.itoa(set), LOG_TYPE_DEBUG); + } + catch(...) + { + log.writeLog(__LINE__, "setSystemReady failed", LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "setSystemReady failed", LOG_TYPE_ERROR); + } + } + catch(...) + { + log.writeLog(__LINE__, "setSystemQueryReady failed", LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "setSystemQueryReady failed", LOG_TYPE_ERROR); } } @@ -7706,25 +7708,30 @@ void startSystemThread(oam::DeviceNetworkList Devicenetworklist) if (DMLprocessstatus.ProcessOpState == oam::ACTIVE) { - rtn = oam::ACTIVE; + rtn = oam::ACTIVE; break; } if (DMLprocessstatus.ProcessOpState == oam::FAILED) { - rtn = oam::FAILED; + rtn = oam::FAILED; + status = oam::API_FAILURE; break; } - // wait some more - sleep(2); - } + // wait some more + sleep(2); + } + + if ( rtn = oam::ACTIVE ) + //set query system state not ready + processManager.setQuerySystemState(true); - processManager.setSystemState(rtn); + processManager.setSystemState(rtn); } + else + processManager.setSystemState(oam::FAILED); - //set query system state ready - processManager.setQuerySystemState(true); // exit thread log.writeLog(__LINE__, "startSystemThread Exit", LOG_TYPE_DEBUG); diff --git a/procmon/main.cpp b/procmon/main.cpp index dac166bce..6d7406ee1 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -268,7 +268,7 @@ int main(int argc, char** argv) } catch (...) {} - if ( cloud == "amazon-ec2" ) + if ( cloud == "amazon-ec2" || cloud == "amazon-vpc" ) { if (!aMonitor.amazonIPCheck()) { @@ -1565,7 +1565,7 @@ static void chldHandleThread(MonitorConfig config) catch (...) {} - // check if process failover is needed due to process outage + // check if Mdoule failover is needed due to process outage aMonitor.checkModuleFailover((*listPtr).ProcessName); //check the db health @@ -1647,17 +1647,20 @@ static void chldHandleThread(MonitorConfig config) restartStatus = " restart failed with hard failure, don't retry!!"; (*listPtr).processID = 0; - // check if process failover is needed due to process outage + // check if Module failover is needed due to process outage aMonitor.checkModuleFailover((*listPtr).ProcessName); break; } else { if ( (*listPtr).processID != oam::API_MINOR_FAILURE ) + { //restarted successful + //Inform Process Manager that Process restart + aMonitor.processRestarted( (*listPtr).ProcessName, false); break; - } - + } + } // restart failed with minor error, sleep and try sleep(5); } @@ -2602,6 +2605,7 @@ void processStatusMSG(messageqcpp::IOSocket* cfIos) BRM::DBRM dbrm; dbrm.setSystemQueryReady(true); } + } break; diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index bc06a35fe..a406b3f1e 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -665,9 +665,6 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO else log.writeLog(__LINE__, "START: process already active " + processName); - //Inform Process Manager that Process restart - //processRestarted(processName); - ackMsg << (ByteStream::byte) ACK; ackMsg << (ByteStream::byte) START; ackMsg << (ByteStream::byte) requestStatus; @@ -772,9 +769,6 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO requestStatus = API_FAILURE; } - //Inform Process Manager that Process restart - //processRestarted(processName); - ackMsg << (ByteStream::byte) ACK; ackMsg << (ByteStream::byte) RESTART; ackMsg << (ByteStream::byte) requestStatus; @@ -4974,7 +4968,6 @@ void ProcessMonitor::checkModuleFailover( std::string processName) { // found a AVAILABLE mate, start it log.writeLog(__LINE__, "Change UM Master to module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG); - log.writeLog(__LINE__, "Disable local UM module " + config.moduleName(), LOG_TYPE_DEBUG); log.writeLog(__LINE__, "Stop local UM module " + config.moduleName(), LOG_TYPE_DEBUG); log.writeLog(__LINE__, "Disable Local will Enable UM module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG); @@ -5845,7 +5838,6 @@ bool ProcessMonitor::amazonIPCheck() log.writeLog(__LINE__, "Assign Elastic IP Address failed : '" + moduleName + "' / '" + ELIPaddress, LOG_TYPE_ERROR); break; } - break; } @@ -6021,8 +6013,13 @@ bool ProcessMonitor::amazonVolumeCheck(int dbrootID) if (oam.attachEC2Volume(volumeName, deviceName, instanceName)) { - string cmd = "mount " + startup::StartUp::installDir() + "/data" + oam.itoa(dbrootID) + " > /dev/null 2>&1"; + + log.writeLog(__LINE__, "amazonVolumeCheck function , volume to attached: " + volumeName, LOG_TYPE_DEBUG); + + string cmd = "mount " + startup::StartUp::installDir() + "/data" + oam.itoa(dbrootID) + " > /dev/null"; + system(cmd.c_str()); + log.writeLog(__LINE__, "amazonVolumeCheck function , volume to mounted: " + volumeName, LOG_TYPE_DEBUG); return true; } else diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index dba39fe86..c4486ddd5 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -25,3 +25,5 @@ add_subdirectory(thrift) add_subdirectory(querytele) add_subdirectory(clusterTester) add_subdirectory(libmysql_client) +add_subdirectory(regr) + diff --git a/utils/funcexp/func_makedate.cpp b/utils/funcexp/func_makedate.cpp index 948b612de..5d013728f 100644 --- a/utils/funcexp/func_makedate.cpp +++ b/utils/funcexp/func_makedate.cpp @@ -149,9 +149,10 @@ uint64_t makedate(rowgroup::Row& row, case CalpontSystemCatalog::TIME: { std::ostringstream ss; - Time aTime = parm[1]->data()->getTimeIntVal(row, isNull); - ss << aTime.hour << aTime.minute << aTime.second; - dayofyear = ss.str(); + char buf[9]; + uint64_t aTime = parm[1]->data()->getTimeIntVal(row, isNull); + DataConvert::timeToString1(aTime, buf, 9); + dayofyear = buf; break; } diff --git a/utils/funcexp/func_timediff.cpp b/utils/funcexp/func_timediff.cpp index 742e8faf7..369bb80a1 100644 --- a/utils/funcexp/func_timediff.cpp +++ b/utils/funcexp/func_timediff.cpp @@ -118,6 +118,11 @@ string Func_timediff::getStrVal(rowgroup::Row& row, case execplan::CalpontSystemCatalog::TIME: case execplan::CalpontSystemCatalog::DATETIME: + if (type1 != type2) + { + isNull = true; + break; + } val1 = parm[0]->data()->getDatetimeIntVal(row, isNull); break; diff --git a/utils/funcexp/funcexp.cpp b/utils/funcexp/funcexp.cpp index 66782cc54..53f7da595 100644 --- a/utils/funcexp/funcexp.cpp +++ b/utils/funcexp/funcexp.cpp @@ -75,6 +75,8 @@ FuncExp::FuncExp() fFuncMap["abs"] = new Func_abs(); fFuncMap["acos"] = new Func_acos(); fFuncMap["add_time"] = new Func_add_time(); + fFuncMap["addtime"] = new Func_add_time(); + fFuncMap["subtime"] = new Func_add_time(); fFuncMap["asin"] = new Func_asin(); fFuncMap["ascii"] = new Func_ascii(); fFuncMap["atan"] = new Func_atan(); diff --git a/utils/regr/CMakeLists.txt b/utils/regr/CMakeLists.txt new file mode 100755 index 000000000..16f44d9af --- /dev/null +++ b/utils/regr/CMakeLists.txt @@ -0,0 +1,26 @@ + +include_directories( ${ENGINE_COMMON_INCLUDES} + ../../dbcon/mysql ) + +########### next target ############### + +set(regr_LIB_SRCS regr_avgx.cpp regr_avgy.cpp regr_count.cpp regr_slope.cpp regr_intercept regr_r2) + +add_definitions(-DMYSQL_DYNAMIC_PLUGIN) + +add_library(regr SHARED ${regr_LIB_SRCS} ) + +set_target_properties(regr PROPERTIES VERSION 1.1.0 SOVERSION 1) + +install(TARGETS regr DESTINATION ${ENGINE_LIBDIR} COMPONENT libs) + + + +set(regr_mysql_LIB_SRCS regrmysql.cpp) + +add_library(regr_mysql SHARED ${regr_mysql_LIB_SRCS}) + +set_target_properties(regr_mysql PROPERTIES VERSION 1.0.0 SOVERSION 1) + +install(TARGETS regr_mysql DESTINATION ${ENGINE_LIBDIR} COMPONENT storage-engine) + diff --git a/utils/regr/regr.vpj b/utils/regr/regr.vpj new file mode 100644 index 000000000..0de8c7282 --- /dev/null +++ b/utils/regr/regr.vpj @@ -0,0 +1,227 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/utils/udfsdk/regr_avgx.cpp b/utils/regr/regr_avgx.cpp similarity index 55% rename from utils/udfsdk/regr_avgx.cpp rename to utils/regr/regr_avgx.cpp index e99871f97..2041647a5 100644 --- a/utils/udfsdk/regr_avgx.cpp +++ b/utils/regr/regr_avgx.cpp @@ -24,6 +24,17 @@ using namespace mcsv1sdk; +class Add_regr_avgx_ToUDAFMap +{ +public: + Add_regr_avgx_ToUDAFMap() + { + UDAFMap::getMap()["regr_avgx"] = new regr_avgx(); + } +}; + +static Add_regr_avgx_ToUDAFMap addToMap; + #define DATATYPE double // Use the simple data model @@ -73,77 +84,12 @@ mcsv1_UDAF::ReturnCode regr_avgx::reset(mcsv1Context* context) mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { - static_any::any& valIn_y = valsIn[0].columnData; static_any::any& valIn_x = valsIn[1].columnData; struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; - DATATYPE val = 0.0; - - if (context->isParamNull(0) || context->isParamNull(1)) - { - return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. - } - - if (valIn_x.empty() || valIn_y.empty()) // Usually empty if NULL. Probably redundant - { - return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. - } - - if (valIn_x.compatible(longTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(charTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(scharTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(shortTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(intTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(llTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(ucharTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(ushortTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(uintTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(ulongTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(ullTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(floatTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(doubleTypeId)) - { - val = valIn_x.cast(); - } + DATATYPE val = convertAnyTo(valIn_x); // For decimal types, we need to move the decimal point. uint32_t scale = valsIn[1].scale; - if (val != 0 && scale > 0) { val /= pow(10.0, (double)scale); @@ -191,72 +137,12 @@ mcsv1_UDAF::ReturnCode regr_avgx::evaluate(mcsv1Context* context, static_any::an mcsv1_UDAF::ReturnCode regr_avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { - static_any::any& valIn_y = valsDropped[0].columnData; static_any::any& valIn_x = valsDropped[1].columnData; struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; - DATATYPE val = 0.0; - - if (valIn_x.empty() || valIn_y.empty()) - { - return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. - } - - if (valIn_x.compatible(charTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(scharTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(shortTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(intTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(longTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(llTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(ucharTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(ushortTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(uintTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(ulongTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(ullTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(floatTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(doubleTypeId)) - { - val = valIn_x.cast(); - } + double val = convertAnyTo(valIn_x); // For decimal types, we need to move the decimal point. uint32_t scale = valsDropped[1].scale; - if (val != 0 && scale > 0) { val /= pow(10.0, (double)scale); diff --git a/utils/udfsdk/regr_avgx.h b/utils/regr/regr_avgx.h similarity index 99% rename from utils/udfsdk/regr_avgx.h rename to utils/regr/regr_avgx.h index 27b8708f7..75791f769 100644 --- a/utils/udfsdk/regr_avgx.h +++ b/utils/regr/regr_avgx.h @@ -26,7 +26,7 @@ * * * CREATE AGGREGATE FUNCTION regr_avgx returns REAL soname - * 'libudf_mysql.so'; + * 'libregr_mysql.so'; * */ #ifndef HEADER_regr_avgx diff --git a/utils/regr/regr_avgy.cpp b/utils/regr/regr_avgy.cpp new file mode 100644 index 000000000..69c654acf --- /dev/null +++ b/utils/regr/regr_avgy.cpp @@ -0,0 +1,153 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_avgy.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +class Add_regr_avgy_ToUDAFMap +{ +public: + Add_regr_avgy_ToUDAFMap() + { + UDAFMap::getMap()["regr_avgy"] = new regr_avgy(); + } +}; + +static Add_regr_avgy_ToUDAFMap addToMap; + +#define DATATYPE double + +// Use the simple data model +struct regr_avgy_data +{ + double sum; + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode regr_avgy::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgy() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[0].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgy() with a non-numeric x argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_avgy_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 4); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_avgy::reset(mcsv1Context* context) +{ + struct regr_avgy_data* data = (struct regr_avgy_data*)context->getUserData()->data; + data->sum = 0; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgy::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + struct regr_avgy_data* data = (struct regr_avgy_data*)context->getUserData()->data; + double val = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[0].scale; + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum += val; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgy::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_avgy_data* outData = (struct regr_avgy_data*)context->getUserData()->data; + struct regr_avgy_data* inData = (struct regr_avgy_data*)userDataIn->data; + + outData->sum += inData->sum; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgy::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_avgy_data* data = (struct regr_avgy_data*)context->getUserData()->data; + + if (data->cnt == 0) + { + valOut = 0; + } + else + { + valOut = data->sum / (double)data->cnt; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgy::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + struct regr_avgy_data* data = (struct regr_avgy_data*)context->getUserData()->data; + double val = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[0].scale; + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum -= val; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/regr/regr_avgy.h b/utils/regr/regr_avgy.h new file mode 100644 index 000000000..c99021f9f --- /dev/null +++ b/utils/regr/regr_avgy.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_avgy.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_avgy function + * + * + * CREATE AGGREGATE FUNCTION regr_avgy returns REAL soname + * 'libregr_mysql.so'; + * + */ +#ifndef HEADER_regr_avgy +#define HEADER_regr_avgy + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Return the regr_avgy value of the dataset + +class regr_avgy : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_avgy() : mcsv1_UDAF() {}; + virtual ~regr_avgy() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_avgy.h + diff --git a/utils/regr/regr_count.cpp b/utils/regr/regr_count.cpp new file mode 100644 index 000000000..c65a1f4a6 --- /dev/null +++ b/utils/regr/regr_count.cpp @@ -0,0 +1,131 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_count.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +class Add_regr_count_ToUDAFMap +{ +public: + Add_regr_count_ToUDAFMap() + { + UDAFMap::getMap()["regr_count"] = new regr_count(); + } +}; + +static Add_regr_count_ToUDAFMap addToMap; + +// Use the simple data model +struct regr_count_data +{ + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode regr_count::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_count() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_count_data)); + context->setResultType(CalpontSystemCatalog::BIGINT); + context->setColWidth(8); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_count::reset(mcsv1Context* context) +{ + struct regr_count_data* data = (struct regr_count_data*)context->getUserData()->data; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_count::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_count_data* data = (struct regr_count_data*)context->getUserData()->data; + + if (context->isParamNull(0) || context->isParamNull(1)) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + if (valIn_x.empty() || valIn_y.empty()) // Usually empty if NULL. Probably redundant + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_count::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_count_data* outData = (struct regr_count_data*)context->getUserData()->data; + struct regr_count_data* inData = (struct regr_count_data*)userDataIn->data; + + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_count::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_count_data* data = (struct regr_count_data*)context->getUserData()->data; + + valOut = data->cnt; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_count::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_count_data* data = (struct regr_count_data*)context->getUserData()->data; + + if (context->isParamNull(0) || context->isParamNull(1)) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + if (valIn_x.empty() || valIn_y.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/regr/regr_count.h b/utils/regr/regr_count.h new file mode 100644 index 000000000..4f4fc558e --- /dev/null +++ b/utils/regr/regr_count.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_count.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_count function + * + * + * CREATE AGGREGATE FUNCTION regr_count returns INTEGER + * soname 'libregr_mysql.so'; + * + */ +#ifndef HEADER_regr_count +#define HEADER_regr_count + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Return the regr_count value of the dataset + +class regr_count : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_count() : mcsv1_UDAF() {}; + virtual ~regr_count() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_count.h + diff --git a/utils/regr/regr_intercept.cpp b/utils/regr/regr_intercept.cpp new file mode 100644 index 000000000..8aca6ee3e --- /dev/null +++ b/utils/regr/regr_intercept.cpp @@ -0,0 +1,197 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_intercept.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +class Add_regr_intercept_ToUDAFMap +{ +public: + Add_regr_intercept_ToUDAFMap() + { + UDAFMap::getMap()["regr_intercept"] = new regr_intercept(); + } +}; + +static Add_regr_intercept_ToUDAFMap addToMap; + +// Use the simple data model +struct regr_intercept_data +{ + uint64_t cnt; + double sumx; + double sumx2; // sum of (x squared) + double sumy; + double sumxy; // sum of (x*y) +}; + + +mcsv1_UDAF::ReturnCode regr_intercept::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_intercept() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_intercept_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 8); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_intercept::reset(mcsv1Context* context) +{ + struct regr_intercept_data* data = (struct regr_intercept_data*)context->getUserData()->data; + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + data->sumxy = 0.0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_intercept::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_intercept_data* data = (struct regr_intercept_data*)context->getUserData()->data; + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsIn[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy += valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsIn[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scaley); + } + + data->sumx += valx; + data->sumx2 += valx*valx; + + data->sumxy += valx*valy; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_intercept::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_intercept_data* outData = (struct regr_intercept_data*)context->getUserData()->data; + struct regr_intercept_data* inData = (struct regr_intercept_data*)userDataIn->data; + + outData->sumx += inData->sumx; + outData->sumx2 += inData->sumx2; + outData->sumy += inData->sumy; + outData->sumxy += inData->sumxy; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_intercept::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_intercept_data* data = (struct regr_intercept_data*)context->getUserData()->data; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumx2 = data->sumx2; + double sumxy = data->sumxy; + double slope = 0.0; + double variance = (N * sumx2) - (sumx * sumx); + if (variance != 0) + { + slope = ((N * sumxy) - (sumx * sumy)) / variance; + valOut = (sumy - (slope * sumx)) / N; + } + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_intercept::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_intercept_data* data = (struct regr_intercept_data*)context->getUserData()->data; + + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsDropped[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy -= valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsDropped[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scaley); + } + + data->sumx -= valx; + data->sumx2 -= valx*valx; + + data->sumxy -= valx*valy; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/regr/regr_intercept.h b/utils/regr/regr_intercept.h new file mode 100644 index 000000000..ed82477cd --- /dev/null +++ b/utils/regr/regr_intercept.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_intercept.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_intercept function + * + * + * CREATE AGGREGATE FUNCTION regr_intercept returns REAL + * soname 'libregr_mysql.so'; + * + */ +#ifndef HEADER_regr_intercept +#define HEADER_regr_intercept + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Return the regr_intercept value of the dataset + +class regr_intercept : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_intercept() : mcsv1_UDAF() {}; + virtual ~regr_intercept() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_intercept.h + diff --git a/utils/regr/regr_r2.cpp b/utils/regr/regr_r2.cpp new file mode 100644 index 000000000..052b5dcfc --- /dev/null +++ b/utils/regr/regr_r2.cpp @@ -0,0 +1,216 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_r2.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +class Add_regr_r2_ToUDAFMap +{ +public: + Add_regr_r2_ToUDAFMap() + { + UDAFMap::getMap()["regr_r2"] = new regr_r2(); + } +}; + +static Add_regr_r2_ToUDAFMap addToMap; + +// Use the simple data model +struct regr_r2_data +{ + uint64_t cnt; + double sumx; + double sumx2; // sum of (x squared) + double sumy; + double sumy2; // sum of (y squared) + double sumxy; // sum of x * y +}; + + +mcsv1_UDAF::ReturnCode regr_r2::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_r2() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_r2_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 8); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_r2::reset(mcsv1Context* context) +{ + struct regr_r2_data* data = (struct regr_r2_data*)context->getUserData()->data; + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + data->sumy2 = 0.0; + data->sumxy = 0.0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_r2::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_r2_data* data = (struct regr_r2_data*)context->getUserData()->data; + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsIn[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy += valy; + data->sumy2 += valy*valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsIn[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scaley); + } + + data->sumx += valx; + data->sumx2 += valx*valx; + + data->sumxy += valx*valy; + + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_r2::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_r2_data* outData = (struct regr_r2_data*)context->getUserData()->data; + struct regr_r2_data* inData = (struct regr_r2_data*)userDataIn->data; + + outData->sumx += inData->sumx; + outData->sumx2 += inData->sumx2; + outData->sumy += inData->sumy; + outData->sumy2 += inData->sumy2; + outData->sumxy += inData->sumxy; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_r2::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_r2_data* data = (struct regr_r2_data*)context->getUserData()->data; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumx2 = data->sumx2; + double sumy2 = data->sumy2; + double sumxy = data->sumxy; + + double var_popx = (sumx2 - (sumx * sumx / N)) / N; + if (var_popx == 0) + { + // When var_popx is 0, NULL is the result. + return mcsv1_UDAF::SUCCESS; + } + double var_popy = (sumy2 - (sumy * sumy / N)) / N; + if (var_popy == 0) + { + // When var_popy is 0, 1 is the result + valOut = 1.0; + return mcsv1_UDAF::SUCCESS; + } + double std_popx = sqrt(var_popx); + double std_popy = sqrt(var_popy); + double covar_pop = (sumxy - ((sumx * sumy) / N)) / N; + double corr = covar_pop / (std_popy * std_popx); + valOut = corr * corr; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_r2::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_r2_data* data = (struct regr_r2_data*)context->getUserData()->data; + + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsDropped[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy -= valy; + data->sumy2 -= valy*valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsDropped[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scaley); + } + + data->sumx -= valx; + data->sumx2 -= valx*valx; + + data->sumxy -= valx*valy; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/regr/regr_r2.h b/utils/regr/regr_r2.h new file mode 100644 index 000000000..6ff65009a --- /dev/null +++ b/utils/regr/regr_r2.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_r2.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_r2 function + * + * + * CREATE AGGREGATE FUNCTION regr_r2 returns REAL + * soname 'libregr_mysql.so'; + * + */ +#ifndef HEADER_regr_intercept +#define HEADER_regr_intercept + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Return the regr_r2 value of the dataset + +class regr_r2 : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_r2() : mcsv1_UDAF() {}; + virtual ~regr_r2() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_intercept.h + diff --git a/utils/regr/regr_slope.cpp b/utils/regr/regr_slope.cpp new file mode 100644 index 000000000..51f649046 --- /dev/null +++ b/utils/regr/regr_slope.cpp @@ -0,0 +1,196 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_slope.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +class Add_regr_slope_ToUDAFMap +{ +public: + Add_regr_slope_ToUDAFMap() + { + UDAFMap::getMap()["regr_slope"] = new regr_slope(); + } +}; + +static Add_regr_slope_ToUDAFMap addToMap; + +// Use the simple data model +struct regr_slope_data +{ + uint64_t cnt; + double sumx; + double sumx2; // sum of (x squared) + double sumy; + double sumxy; // sum of (x*y) +}; + + +mcsv1_UDAF::ReturnCode regr_slope::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_slope() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_slope_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 8); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_slope::reset(mcsv1Context* context) +{ + struct regr_slope_data* data = (struct regr_slope_data*)context->getUserData()->data; + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + data->sumxy = 0.0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_slope::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_slope_data* data = (struct regr_slope_data*)context->getUserData()->data; + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsIn[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy += valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsIn[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scaley); + } + + data->sumx += valx; + data->sumx2 += valx*valx; + + data->sumxy += valx*valy; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_slope::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_slope_data* outData = (struct regr_slope_data*)context->getUserData()->data; + struct regr_slope_data* inData = (struct regr_slope_data*)userDataIn->data; + + outData->sumx += inData->sumx; + outData->sumx2 += inData->sumx2; + outData->sumy += inData->sumy; + outData->sumxy += inData->sumxy; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_slope::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_slope_data* data = (struct regr_slope_data*)context->getUserData()->data; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumx2 = data->sumx2; + double sumxy = data->sumxy; + double variance = (N * sumx2) - (sumx * sumx); + if (variance != 0) + { + double slope = ((N * sumxy) - (sumx * sumy)) / variance; + valOut = slope; + } + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_slope::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_slope_data* data = (struct regr_slope_data*)context->getUserData()->data; + + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsDropped[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy -= valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsDropped[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scaley); + } + + data->sumx -= valx; + data->sumx2 -= valx*valx; + + data->sumxy -= valx*valy; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/regr/regr_slope.h b/utils/regr/regr_slope.h new file mode 100644 index 000000000..9c148d895 --- /dev/null +++ b/utils/regr/regr_slope.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_slope.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_slope function + * + * + * CREATE AGGREGATE FUNCTION regr_slope returns REAL + * soname 'libregr_mysql.so'; + * + */ +#ifndef HEADER_regr_slope +#define HEADER_regr_slope + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Return the regr_slope value of the dataset + +class regr_slope : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_slope() : mcsv1_UDAF() {}; + virtual ~regr_slope() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_slope.h + diff --git a/utils/regr/regrmysql.cpp b/utils/regr/regrmysql.cpp new file mode 100644 index 000000000..fce6bb440 --- /dev/null +++ b/utils/regr/regrmysql.cpp @@ -0,0 +1,725 @@ +#include +#include +#include +#include +using namespace std; + +#include "idb_mysql.h" + +namespace +{ +inline double cvtArgToDouble(int t, const char* v) +{ + double d = 0.0; + + switch (t) + { + case INT_RESULT: + d = (double)(*((long long*)v)); + break; + + case REAL_RESULT: + d = *((double*)v); + break; + + case DECIMAL_RESULT: + case STRING_RESULT: + d = strtod(v, 0); + break; + + case ROW_RESULT: + break; + } + + return d; +} +inline long long cvtArgToInt(int t, const char* v) +{ + long long ll = 0; + + switch (t) + { + case INT_RESULT: + ll = *((long long*)v); + break; + + case REAL_RESULT: + ll = (long long)(*((double*)v)); + break; + + case DECIMAL_RESULT: + case STRING_RESULT: + ll = strtoll(v, 0, 0); + break; + + case ROW_RESULT: + break; + } + + return ll; +} +inline string cvtArgToString(int t, const char* v) +{ + string str; + + switch (t) + { + case INT_RESULT: + { + long long ll; + ll = *((long long*)v); + ostringstream oss; + oss << ll; + str = oss.str(); + break; + } + + case REAL_RESULT: + { + double d; + d = *((double*)v); + ostringstream oss; + oss << d; + str = oss.str(); + break; + } + + case DECIMAL_RESULT: + case STRING_RESULT: + str = v; + break; + + case ROW_RESULT: + break; + } + + return str; +} +} + +/**************************************************************************** + * UDF function interface for MariaDB connector to recognize is defined in + * this section. MariaDB's UDF function creation guideline needs to be followed. + * + * Three interface need to be defined on the connector for each UDF function. + * + * XXX_init: To allocate the necessary memory for the UDF function and validate + * the input. + * XXX_deinit: To clean up the memory. + * XXX: The function implementation. + * Detailed instruction can be found at MariaDB source directory: + * ~/sql/udf_example.cc. + * + * Please note that the implementation of the function defined on the connector + * will only be called when all the input arguments are constant. e.g., + * mcs_add(2,3). That way, the function does not run in a distributed fashion + * and could be slow. If there is a need for the UDF function to run with + * pure constant input, then one needs to put a implementation in the XXX + * body, which is very similar to the ones in getXXXval API. If there's no + * such need for a given UDF, then the XXX interface can just return a dummy + * result because this function will never be called. + */ +extern "C" +{ + +//======================================================================= + + /** + * regr_avgx + */ + struct regr_avgx_data + { + double sumx; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_avgx_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_avgx() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // Test for NULL in x and y + if (args->args[0] == 0 || args->args[1] == 0) + { + return; + } + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[1]); + ++data->cnt; + data->sumx += xval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + double regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } + +//======================================================================= + + /** + * regr_avgy + */ + struct regr_avgy_data + { + double sumy; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_avgy_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_avgy_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_avgy() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_avgy_data*) malloc(sizeof(struct regr_avgy_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumy = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_avgy_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgy_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_avgy_data* data = (struct regr_avgy_data*)initid->ptr; + data->sumy = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgy_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // Test for NULL in x and y + if (args->args[0] == 0 || args->args[1] == 0) + { + return; + } + struct regr_avgy_data* data = (struct regr_avgy_data*)initid->ptr; + double yval = cvtArgToDouble(args->arg_type[0], args->args[0]); + ++data->cnt; + data->sumy += yval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + double regr_avgy(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_avgy_data* data = (struct regr_avgy_data*)initid->ptr; + return data->sumy / data->cnt; + } + +//======================================================================= + + /** + * regr_count + */ + struct regr_count_data + { + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_count_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_count_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_count() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_count_data*) malloc(sizeof(struct regr_count_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_count_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_count_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_count_data* data = (struct regr_count_data*)initid->ptr; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_count_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // Test for NULL in x and y + if (args->args[0] == 0 || args->args[1] == 0) + { + return; + } + struct regr_count_data* data = (struct regr_count_data*)initid->ptr; + ++data->cnt; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long regr_count(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_count_data* data = (struct regr_count_data*)initid->ptr; + return data->cnt; + } + +//======================================================================= + + /** + * regr_slope + */ + struct regr_slope_data + { + int64_t cnt; + double sumx; + double sumx2; // sum of (x squared) + double sumy; + double sumxy; // sum of (x*y) + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_slope_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_slope_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_slope() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_slope_data*) malloc(sizeof(struct regr_slope_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + data->sumxy = 0.0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_slope_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_slope_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_slope_data* data = (struct regr_slope_data*)initid->ptr; + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + data->sumxy = 0.0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_slope_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // Test for NULL in x and y + if (args->args[0] == 0 || args->args[1] == 0) + { + return; + } + struct regr_slope_data* data = (struct regr_slope_data*)initid->ptr; + double yval = cvtArgToDouble(args->arg_type[0], args->args[0]); + double xval = cvtArgToDouble(args->arg_type[1], args->args[1]); + data->sumy += yval; + data->sumx += xval; + data->sumx2 += xval*xval; + data->sumxy += xval*yval; + ++data->cnt; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + double regr_slope(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_slope_data* data = (struct regr_slope_data*)initid->ptr; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumx2 = data->sumx2; + double sumxy = data->sumxy; + double variance = (N * sumx2) - (sumx * sumx); + if (variance) + { + return ((N * sumxy) - (sumx * sumy)) / variance; + } + } + *is_null = 1; + return 0; + } + +//======================================================================= + + /** + * regr_intercept + */ + struct regr_intercept_data + { + int64_t cnt; + double sumx; + double sumx2; // sum of (x squared) + double sumy; + double sumxy; // sum of (x*y) + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_intercept_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_intercept_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_intercept() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_intercept_data*) malloc(sizeof(struct regr_intercept_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + data->sumxy = 0.0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_intercept_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_intercept_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_intercept_data* data = (struct regr_intercept_data*)initid->ptr; + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + data->sumxy = 0.0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_intercept_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // Test for NULL in x and y + if (args->args[0] == 0 || args->args[1] == 0) + { + return; + } + struct regr_intercept_data* data = (struct regr_intercept_data*)initid->ptr; + double yval = cvtArgToDouble(args->arg_type[0], args->args[0]); + double xval = cvtArgToDouble(args->arg_type[1], args->args[1]); + data->sumy += yval; + data->sumx += xval; + data->sumx2 += xval*xval; + data->sumxy += xval*yval; + ++data->cnt; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + double regr_intercept(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_intercept_data* data = (struct regr_intercept_data*)initid->ptr; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumx2 = data->sumx2; + double sumxy = data->sumxy; + double variance = (N * sumx2) - (sumx * sumx); + if (variance) + { + double slope = ((N * sumxy) - (sumx * sumy)) / variance; + return (sumy - (slope * sumx)) / N; + } + } + *is_null = 1; + return 0; + } + +//======================================================================= + + /** + * regr_r2 + */ + struct regr_r2_data + { + int64_t cnt; + double sumx; + double sumx2; // sum of (x squared) + double sumy; + double sumy2; // sum of (y squared) + double sumxy; // sum of (x*y) + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_r2_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_r2_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_r2() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_r2_data*) malloc(sizeof(struct regr_r2_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + data->sumy2 = 0.0; + data->sumxy = 0.0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_r2_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_r2_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_r2_data* data = (struct regr_r2_data*)initid->ptr; + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + data->sumy2 = 0.0; + data->sumxy = 0.0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_r2_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // Test for NULL in x and y + if (args->args[0] == 0 || args->args[1] == 0) + { + return; + } + struct regr_r2_data* data = (struct regr_r2_data*)initid->ptr; + double yval = cvtArgToDouble(args->arg_type[0], args->args[0]); + double xval = cvtArgToDouble(args->arg_type[1], args->args[1]); + data->sumy += yval; + data->sumx += xval; + data->sumx2 += xval*xval; + data->sumy2 += yval*yval; + data->sumxy += xval*yval; + ++data->cnt; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + double regr_r2(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_r2_data* data = (struct regr_r2_data*)initid->ptr; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumx2 = data->sumx2; + double sumy2 = data->sumy2; + double sumxy = data->sumxy; + double var_popx = (sumx2 - (sumx * sumx / N)) / N; + if (var_popx == 0) + { + // When var_popx is 0, NULL is the result. + *is_null = 1; + return 0; + } + double var_popy = (sumy2 - (sumy * sumy / N)) / N; + if (var_popy == 0) + { + // When var_popy is 0, 1 is the result + return 1; + } + double std_popx = sqrt(var_popx); + double std_popy = sqrt(var_popy); + double covar_pop = (sumxy - ((sumx * sumy) / N)) / N; + double corr = covar_pop / (std_popy * std_popx); + return corr * corr; + } + *is_null = 1; + return 0; + } +} +// vim:ts=4 sw=4: + diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index f9db8b266..d08781c07 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -2015,18 +2015,9 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, for (uint32_t i = 0; i < paramCount; ++i) { - // If UDAF_IGNORE_NULLS is on, bIsNull gets set the first time - // we find a null. We still need to eat the rest of the parameters - // to sync updateEntry - if (bIsNull) - { - ++funcColsIdx; - continue; - } - SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; mcsv1sdk::ColumnDatum& datum = valsIn[i]; - // Turn on NULL flags + // Turn on NULL flags based on the data dataFlags[i] = 0; // If this particular parameter is a constant, then we need @@ -2043,9 +2034,11 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, { if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { - bIsNull = true; - ++funcColsIdx; - continue; + // When Ignore nulls, if there are multiple parameters and any + // one of them is NULL, we ignore the entry. We need to increment + // funcColsIdx the number of extra parameters. + funcColsIdx += paramCount - i - 1; + return; } dataFlags[i] |= mcsv1sdk::PARAM_IS_NULL; diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index b593239cd..e039d5c2a 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -242,7 +242,7 @@ struct RowUDAFFunctionCol : public RowAggFunctionCol mcsv1sdk::mcsv1Context fUDAFContext; // The UDAF context bool bInterrupted; // Shared by all the threads -}; + }; inline void RowAggFunctionCol::serialize(messageqcpp::ByteStream& bs) const { diff --git a/utils/threadpool/prioritythreadpool.cpp b/utils/threadpool/prioritythreadpool.cpp index 4c043ebbb..92fd0ad98 100644 --- a/utils/threadpool/prioritythreadpool.cpp +++ b/utils/threadpool/prioritythreadpool.cpp @@ -42,15 +42,22 @@ PriorityThreadPool::PriorityThreadPool(uint targetWeightPerRun, uint highThreads uint midThreads, uint lowThreads, uint ID) : _stop(false), weightPerRun(targetWeightPerRun), id(ID) { + boost::thread* newThread; for (uint32_t i = 0; i < highThreads; i++) - threads.create_thread(ThreadHelper(this, HIGH)); - + { + newThread = threads.create_thread(ThreadHelper(this, HIGH)); + newThread->detach(); + } for (uint32_t i = 0; i < midThreads; i++) - threads.create_thread(ThreadHelper(this, MEDIUM)); - + { + newThread = threads.create_thread(ThreadHelper(this, MEDIUM)); + newThread->detach(); + } for (uint32_t i = 0; i < lowThreads; i++) - threads.create_thread(ThreadHelper(this, LOW)); - + { + newThread = threads.create_thread(ThreadHelper(this, LOW)); + newThread->detach(); + } cout << "started " << highThreads << " high, " << midThreads << " med, " << lowThreads << " low.\n"; defaultThreadCounts[HIGH] = threadCounts[HIGH] = highThreads; @@ -65,6 +72,7 @@ PriorityThreadPool::~PriorityThreadPool() void PriorityThreadPool::addJob(const Job& job, bool useLock) { + boost::thread* newThread; mutex::scoped_lock lk(mutex, defer_lock_t()); if (useLock) @@ -73,19 +81,22 @@ void PriorityThreadPool::addJob(const Job& job, bool useLock) // Create any missing threads if (defaultThreadCounts[HIGH] != threadCounts[HIGH]) { - threads.create_thread(ThreadHelper(this, HIGH)); + newThread = threads.create_thread(ThreadHelper(this, HIGH)); + newThread->detach(); threadCounts[HIGH]++; } if (defaultThreadCounts[MEDIUM] != threadCounts[MEDIUM]) { - threads.create_thread(ThreadHelper(this, MEDIUM)); + newThread = threads.create_thread(ThreadHelper(this, MEDIUM)); + newThread->detach(); threadCounts[MEDIUM]++; } if (defaultThreadCounts[LOW] != threadCounts[LOW]) { - threads.create_thread(ThreadHelper(this, LOW)); + newThread = threads.create_thread(ThreadHelper(this, LOW)); + newThread->detach(); threadCounts[LOW]++; } @@ -281,7 +292,6 @@ void PriorityThreadPool::sendErrorMsg(uint32_t id, uint32_t step, primitiveproce void PriorityThreadPool::stop() { _stop = true; - threads.join_all(); } } // namespace threadpool diff --git a/utils/threadpool/threadpool.cpp b/utils/threadpool/threadpool.cpp index f1aa4ec19..a12e574b8 100644 --- a/utils/threadpool/threadpool.cpp +++ b/utils/threadpool/threadpool.cpp @@ -43,7 +43,8 @@ ThreadPool::ThreadPool() } ThreadPool::ThreadPool( size_t maxThreads, size_t queueSize ) - : fMaxThreads( maxThreads ), fQueueSize( queueSize ) + :fMaxThreads( maxThreads ), fQueueSize( queueSize ), + fPruneThread( NULL ) { init(); } @@ -72,6 +73,7 @@ void ThreadPool::init() fStop = false; fNextFunctor = fWaitingFunctors.end(); fNextHandle = 1; + fPruneThread = new boost::thread(boost::bind(&ThreadPool::pruneThread, this)); } void ThreadPool::setQueueSize(size_t queueSize) @@ -80,6 +82,39 @@ void ThreadPool::setQueueSize(size_t queueSize) fQueueSize = queueSize; } +void ThreadPool::pruneThread() +{ + boost::mutex::scoped_lock lock2(fPruneMutex); + + while(true) + { + boost::system_time timeout = boost::get_system_time() + boost::posix_time::minutes(1); + if (!fPruneThreadEnd.timed_wait(fPruneMutex, timeout)) + { + while(!fPruneThreads.empty()) + { + if (fDebug) + { + ostringstream oss; + oss << "pruning thread " << fPruneThreads.top(); + logging::Message::Args args; + logging::Message message(0); + args.add(oss.str()); + message.format( args ); + logging::LoggingID lid(22); + logging::MessageLog ml(lid); + ml.logWarningMessage( message ); + } + fThreads.join_one(fPruneThreads.top()); + fPruneThreads.pop(); + } + } + else + { + break; + } + } +} void ThreadPool::setMaxThreads(size_t maxThreads) { @@ -93,6 +128,9 @@ void ThreadPool::stop() fStop = true; lock1.unlock(); + fPruneThreadEnd.notify_all(); + fPruneThread->join(); + delete fPruneThread; fNeedThread.notify_all(); fThreads.join_all(); } @@ -305,6 +343,8 @@ void ThreadPool::beginThread() throw() { if (fThreadCount > fMaxThreads) { + boost::mutex::scoped_lock lock2(fPruneMutex); + fPruneThreads.push(boost::this_thread::get_id()); --fThreadCount; return; } diff --git a/utils/threadpool/threadpool.h b/utils/threadpool/threadpool.h index 11d67338f..7bc605472 100644 --- a/utils/threadpool/threadpool.h +++ b/utils/threadpool/threadpool.h @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,106 @@ namespace threadpool { + +// Taken from boost::thread_group and adapted +class ThreadPoolGroup +{ +private: + ThreadPoolGroup(ThreadPoolGroup const&); + ThreadPoolGroup& operator=(ThreadPoolGroup const&); +public: + ThreadPoolGroup() {} + ~ThreadPoolGroup() + { + for(std::list::iterator it=threads.begin(),end=threads.end(); + it!=end; + ++it) + { + delete *it; + } + } + + template + boost::thread* create_thread(F threadfunc) + { + boost::lock_guard guard(m); + std::auto_ptr new_thread(new boost::thread(threadfunc)); + threads.push_back(new_thread.get()); + return new_thread.release(); + } + + void add_thread(boost::thread* thrd) + { + if(thrd) + { + boost::lock_guard guard(m); + threads.push_back(thrd); + } + } + + void remove_thread(boost::thread* thrd) + { + boost::lock_guard guard(m); + std::list::iterator const it=std::find(threads.begin(),threads.end(),thrd); + if(it!=threads.end()) + { + threads.erase(it); + } + } + + void join_all() + { + boost::shared_lock guard(m); + + for(std::list::iterator it=threads.begin(),end=threads.end(); + it!=end; + ++it) + { + (*it)->join(); + } + } + + void interrupt_all() + { + boost::shared_lock guard(m); + + for(std::list::iterator it=threads.begin(),end=threads.end(); + it!=end; + ++it) + { + (*it)->interrupt(); + } + } + + size_t size() const + { + boost::shared_lock guard(m); + return threads.size(); + } + + void join_one(boost::thread::id id) + { + boost::shared_lock guard(m); + for(std::list::iterator it=threads.begin(),end=threads.end(); + it!=end; + ++it) + { + if ((*it)->get_id() == id) + { + (*it)->join(); + threads.erase(it); + return; + } + } + + } + +private: + std::list threads; + mutable boost::shared_mutex m; +}; + + /** @brief ThreadPool is a component for working with pools of threads and asynchronously * executing tasks. It is responsible for creating threads and tracking which threads are "busy" * and which are idle. Idle threads are utilized as "work" is added to the system. @@ -207,6 +308,7 @@ private: */ void beginThread() throw(); + void pruneThread(); ThreadPool(const ThreadPool&); ThreadPool& operator = (const ThreadPool&); @@ -245,7 +347,7 @@ private: boost::mutex fMutex; boost::condition fThreadAvailable; // triggered when a thread is available boost::condition fNeedThread; // triggered when a thread is needed - boost::thread_group fThreads; + ThreadPoolGroup fThreads; bool fStop; long fGeneralErrors; @@ -255,6 +357,10 @@ private: std::string fName; // Optional to add a name to the pool for debugging. bool fDebug; + boost::mutex fPruneMutex; + boost::condition fPruneThreadEnd; + boost::thread* fPruneThread; + std::stack fPruneThreads; // A list of stale thread IDs to be joined }; // This class, if instantiated, will continuously log details about the indicated threadpool diff --git a/utils/udfsdk/CMakeLists.txt b/utils/udfsdk/CMakeLists.txt index ad4460977..417ccb7ed 100755 --- a/utils/udfsdk/CMakeLists.txt +++ b/utils/udfsdk/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ########### next target ############### -set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp avg_mode.cpp regr_avgx.cpp avgx.cpp) +set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp avg_mode.cpp avgx.cpp distinct_count.cpp) add_definitions(-DMYSQL_DYNAMIC_PLUGIN) diff --git a/utils/udfsdk/distinct_count.cpp b/utils/udfsdk/distinct_count.cpp new file mode 100644 index 000000000..66dcea18f --- /dev/null +++ b/utils/udfsdk/distinct_count.cpp @@ -0,0 +1,99 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include "distinct_count.h" + +using namespace mcsv1sdk; + +struct distinct_count_data +{ + uint64_t cnt; +}; + +#define OUT_TYPE int64_t +mcsv1_UDAF::ReturnCode distinct_count::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + context->setUserDataSize(sizeof(distinct_count_data)); + if (context->getParameterCount() != 1) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("avgx() with other than 1 arguments"); + return mcsv1_UDAF::ERROR; + } + context->setResultType(CalpontSystemCatalog::BIGINT); + context->setColWidth(8); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + context->setRunFlag(mcsv1sdk::UDAF_DISTINCT); + context->setRunFlag(mcsv1sdk::UDAF_OVER_REQUIRED); + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode distinct_count::reset(mcsv1Context* context) +{ + struct distinct_count_data* data = (struct distinct_count_data*)context->getUserData()->data; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode distinct_count::nextValue(mcsv1Context* context, + ColumnDatum* valsIn) +{ + static_any::any& valIn = valsIn[0].columnData; + struct distinct_count_data* data = (struct distinct_count_data*)context->getUserData()->data; + + if (valIn.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + data->cnt++; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode distinct_count::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + struct distinct_count_data* outData = (struct distinct_count_data*)context->getUserData()->data; + struct distinct_count_data* inData = (struct distinct_count_data*)userDataIn->data; + outData->cnt += inData->cnt; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode distinct_count::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct distinct_count_data* data = (struct distinct_count_data*)context->getUserData()->data; + valOut = data->cnt; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode distinct_count::dropValue(mcsv1Context* context, + ColumnDatum* valsDropped) +{ + static_any::any& valIn = valsDropped[0].columnData; + struct distinct_count_data* data = (struct distinct_count_data*)context->getUserData()->data; + + if (valIn.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + data->cnt--; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/distinct_count.h b/utils/udfsdk/distinct_count.h new file mode 100644 index 000000000..1d804eaa8 --- /dev/null +++ b/utils/udfsdk/distinct_count.h @@ -0,0 +1,222 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* mcsv1_UDAF.h +***********************************************************************/ + +/** + * Columnstore interface for writing a User Defined Aggregate + * Functions (UDAF) and User Defined Analytic Functions (UDAnF) + * or a function that can act as either - UDA(n)F + * + * The basic steps are: + * + * 1. Create a the UDA(n)F function interface in some .h file. + * 2. Create the UDF function implementation in some .cpp file + * 3. Create the connector stub (MariaDB UDAF definition) for + * this UDF function. + * 4. build the dynamic library using all of the source. + * 5 Put the library in $COLUMNSTORE_INSTALL/lib of + * all modules + * 6. restart the Columnstore system. + * 7. notify mysqld about the new functions with commands like: + * + * CREATE AGGREGATE FUNCTION distinct_count returns INT + * soname 'libudf_mysql.so'; + * + */ +#ifndef HEADER_distinct_count +#define HEADER_distinct_count + +#include +#include +#include + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. +class distinct_count : public mcsv1_UDAF +{ +public: + // Defaults OK + distinct_count() : mcsv1_UDAF(){}; + virtual ~distinct_count(){}; + + /** + * init() + * + * Mandatory. Implement this to initialize flags and instance + * data. Called once per SQL statement. You can do any sanity + * checks here. + * + * colTypes (in) - A vector of ColDataType defining the + * parameters of the UDA(n)F call. These can be used to decide + * to override the default return type. If desired, the new + * return type can be set by context->setReturnType() and + * decimal precision can be set in context-> + * setResultDecimalCharacteristics. + * + * Return mcsv1_UDAF::ERROR on any error, such as non-compatible + * colTypes or wrong number of arguments. Else return + * mcsv1_UDAF::SUCCESS. + */ + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); + + /** + * reset() + * + * Mandatory. Reset the UDA(n)F for a new group, partition or, + * in some cases, new Window Frame. Do not free any memory + * allocated by context->setUserDataSize(). The SDK Framework owns + * that memory and will handle that. Use this opportunity to + * reset any variables in context->getUserData() needed for the + * next aggregation. May be called multiple times if running in + * a ditributed fashion. + * + * Use this opportunity to initialize the userData. + */ + virtual ReturnCode reset(mcsv1Context* context); + + /** + * nextValue() + * + * Mandatory. Handle a single row. + * + * colsIn - A vector of data structure describing the input + * data. + * + * This function is called once for every row in the filtered + * result set (before aggregation). It is very important that + * this function is efficient. + * + * If the UDAF is running in a distributed fashion, nextValue + * cannot depend on order, as it will only be called for each + * row found on the specific PM. + * + * valsIn (in) - a vector of the parameters from the row. + */ + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + /** + * subEvaluate() + * + * Mandatory -- Called if the UDAF is running in a distributed + * fashion. Columnstore tries to run all aggregate functions + * distributed, depending on context. + * + * Perform an aggregation on rows partially aggregated by + * nextValue. Columnstore calls nextValue for each row on a + * given PM for a group (GROUP BY). subEvaluate is called on the + * UM to consolodate those values into a single instance of + * userData. Keep your aggregated totals in context's userData. + * The first time this is called for a group, reset() would have + * been called with this version of userData. + * + * Called for every partial data set in each group in GROUP BY. + * + * When subEvaluate has been called for all subAggregated data + * sets, Evaluate will be called with the same context as here. + * + * valIn (In) - This is a pointer to a memory block of the size + * set in setUserDataSize. It will contain the value of userData + * as seen in the last call to NextValue for a given PM. + * + */ + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn); + + /** + * evaluate() + * + * Mandatory. Get the aggregated value. + * + * Called for every new group if UDAF GROUP BY, UDAnF partition + * or, in some cases, new Window Frame. + * + * Set the aggregated value into valOut. The datatype is assumed + * to be the same as that set in the init() function; + * + * If the UDAF is running in a distributed fashion, evaluate is + * called after a series of subEvaluate calls. + * + * valOut (out) - Set the aggregated value here. The datatype is + * assumed to be the same as that set in the init() function; + * + * To return a NULL value, don't assign to valOut. + */ + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + /** + * dropValue() + * + * Optional -- If defined, the server will call this instead of + * reset for UDAnF. + * + * Don't implement if a UDAnF has one or more of the following: + * The UDAnF can't be used with a Window Frame + * The UDAnF is not reversable in some way + * The UDAnF is not interested in optimal performance + * + * If not implemented, reset() followed by a series of + * nextValue() will be called for each movement of the Window + * Frame. + * + * If implemented, then each movement of the Window Frame will + * result in dropValue() being called for each row falling out + * of the Frame and nextValue() being called for each new row + * coming into the Frame. + * + * valsDropped (in) - a vector of the parameters from the row + * leaving the Frame + * + * dropValue() will not be called for unbounded/current row type + * frames, as those are already optimized. + */ + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: + +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_distinct_count.h + diff --git a/utils/udfsdk/mcsv1_udaf.cpp b/utils/udfsdk/mcsv1_udaf.cpp index 9e4596440..9d513ced2 100644 --- a/utils/udfsdk/mcsv1_udaf.cpp +++ b/utils/udfsdk/mcsv1_udaf.cpp @@ -31,14 +31,21 @@ using namespace mcsv1sdk; * This is a temporary kludge until we get the library loader * task complete */ -UDAF_MAP UDAFMap::fm; #include "allnull.h" #include "ssq.h" +#include "median.h" #include "avg_mode.h" -#include "regr_avgx.h" #include "avgx.h" + +UDAF_MAP& UDAFMap::fm() +{ + static UDAF_MAP* m = new UDAF_MAP; + return *m; +} + UDAF_MAP& UDAFMap::getMap() { + UDAF_MAP& fm = UDAFMap::fm(); if (fm.size() > 0) { return fm; @@ -51,8 +58,8 @@ UDAF_MAP& UDAFMap::getMap() // the function names passed to the interface is always in lower case. fm["allnull"] = new allnull(); fm["ssq"] = new ssq(); +// fm["median"] = new median(); fm["avg_mode"] = new avg_mode(); - fm["regr_avgx"] = new regr_avgx(); fm["avgx"] = new avgx(); return fm; diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index e09228d77..ec0d0cb79 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -108,7 +108,7 @@ public: static EXPORT UDAF_MAP& getMap(); private: - static UDAF_MAP fm; + static UDAF_MAP& fm(); }; /** @@ -189,6 +189,7 @@ static uint64_t UDAF_WINDOWFRAME_REQUIRED __attribute__ ((unused)) = 1 << 4; // static uint64_t UDAF_WINDOWFRAME_ALLOWED __attribute__ ((unused)) = 1 << 5; // If used as UDAnF, a WINDOW FRAME is optional static uint64_t UDAF_MAYBE_NULL __attribute__ ((unused)) = 1 << 6; // If UDA(n)F might return NULL. static uint64_t UDAF_IGNORE_NULLS __attribute__ ((unused)) = 1 << 7; // If UDA(n)F wants NULL rows suppressed. +static uint64_t UDAF_DISTINCT __attribute__ ((unused)) = 1 << 8; // Force UDA(n)F to be distinct on first param. // Flags set by the framework to define the context of the call. // User code shouldn't use these directly @@ -380,6 +381,7 @@ private: std::string functionName; mcsv1sdk::mcsv1_UDAF* func; int32_t fParamCount; + std::vector paramKeys; public: // For use by the framework @@ -402,6 +404,7 @@ public: EXPORT mcsv1sdk::mcsv1_UDAF* getFunction() const; EXPORT boost::shared_ptr getUserDataSP(); EXPORT void setParamCount(int32_t paramCount); + std::vector* getParamKeys(); }; // Since aggregate functions can operate on any data type, we use the following structure @@ -605,6 +608,9 @@ public: virtual ReturnCode createUserData(UserData*& userdata, int32_t& length); protected: + // some handy conversion routines + template + T convertAnyTo(static_any::any&); // These are handy for testing the actual type of static_any static const static_any::any& charTypeId; static const static_any::any& scharTypeId; @@ -947,6 +953,11 @@ inline void mcsv1Context::setParamCount(int32_t paramCount) fParamCount = paramCount; } +inline std::vector* mcsv1Context::getParamKeys() +{ + return ¶mKeys; +} + inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { return NOT_IMPLEMENTED; @@ -959,6 +970,68 @@ inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::createUserData(UserData*& userData, in return SUCCESS; } + + +// Handy helper functions +template +inline T mcsv1_UDAF::convertAnyTo(static_any::any& valIn) +{ + T val; + if (valIn.compatible(longTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(charTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(scharTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(shortTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(intTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(llTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ucharTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ushortTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(uintTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ulongTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ullTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(floatTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(doubleTypeId)) + { + val = valIn.cast(); + } + return val; +} + }; // namespace mcssdk #undef EXPORT diff --git a/utils/udfsdk/udfmysql.cpp b/utils/udfsdk/udfmysql.cpp index 1c0fee1db..60da18a43 100644 --- a/utils/udfsdk/udfmysql.cpp +++ b/utils/udfsdk/udfmysql.cpp @@ -349,6 +349,78 @@ extern "C" return data->sumsq; } +//======================================================================= + + /** + * MEDIAN connector stub + */ +#ifdef _MSC_VER + __declspec(dllexport) +#endif + my_bool median_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + if (args->arg_count != 1) + { + strcpy(message, "median() requires one argument"); + return 1; + } + + /* + if (!(data = (struct ssq_data*) malloc(sizeof(struct ssq_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumsq = 0; + + initid->ptr = (char*)data; + */ + return 0; + } + +#ifdef _MSC_VER + __declspec(dllexport) +#endif + void median_deinit(UDF_INIT* initid) + { +// free(initid->ptr); + } + +#ifdef _MSC_VER + __declspec(dllexport) +#endif + void + median_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { +// struct ssq_data* data = (struct ssq_data*)initid->ptr; +// data->sumsq = 0; + } + +#ifdef _MSC_VER + __declspec(dllexport) +#endif + void + median_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { +// struct ssq_data* data = (struct ssq_data*)initid->ptr; +// double val = cvtArgToDouble(args->arg_type[0], args->args[0]); +// data->sumsq = val*val; + } + +#ifdef _MSC_VER + __declspec(dllexport) +#endif + long long median(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { +// struct ssq_data* data = (struct ssq_data*)initid->ptr; +// return data->sumsq; + return 0; + } + /** * avg_mode connector stub */ @@ -422,167 +494,83 @@ extern "C" //======================================================================= /** - * regr_avgx connector stub - */ - struct regr_avgx_data - { - double sumx; - int64_t cnt; - }; - -#ifdef _MSC_VER - __declspec(dllexport) -#endif - my_bool regr_avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) - { - struct regr_avgx_data* data; - - if (args->arg_count != 2) - { - strcpy(message, "regr_avgx() requires two arguments"); - return 1; - } - - if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data)))) - { - strmov(message, "Couldn't allocate memory"); - return 1; - } - - data->sumx = 0; - data->cnt = 0; - - initid->ptr = (char*)data; - return 0; - } - -#ifdef _MSC_VER - __declspec(dllexport) -#endif - void regr_avgx_deinit(UDF_INIT* initid) - { - free(initid->ptr); - } - -#ifdef _MSC_VER - __declspec(dllexport) -#endif - void - regr_avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), - char* message __attribute__((unused))) - { - struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; - data->sumx = 0; - data->cnt = 0; - } - -#ifdef _MSC_VER - __declspec(dllexport) -#endif - void - regr_avgx_add(UDF_INIT* initid, UDF_ARGS* args, - char* is_null, - char* message __attribute__((unused))) - { - // TODO test for NULL in x and y - struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; - double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); - ++data->cnt; - data->sumx += xval; - } - -#ifdef _MSC_VER - __declspec(dllexport) -#endif - long long regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), - char* is_null, char* error __attribute__((unused))) - { - struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; - return data->sumx / data->cnt; - } - -//======================================================================= - - /** - * avgx connector stub. Exactly the same functionality as the - * built in avg() function. Use to test the performance of the - * API + * avgx connector stub. Exactly the same functionality as the + * built in avg() function. Use to test the performance of the + * API */ struct avgx_data { - double sumx; - int64_t cnt; + double sumx; + int64_t cnt; }; - -#ifdef _MSC_VER + + #ifdef _MSC_VER __declspec(dllexport) -#endif + #endif my_bool avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) { - struct avgx_data* data; + struct avgx_data* data; + if (args->arg_count != 1) + { + strcpy(message,"avgx() requires one argument"); + return 1; + } - if (args->arg_count != 1) - { - strcpy(message, "avgx() requires one argument"); - return 1; - } - - if (!(data = (struct avgx_data*) malloc(sizeof(struct avgx_data)))) - { - strmov(message, "Couldn't allocate memory"); - return 1; - } - - data->sumx = 0; + if (!(data = (struct avgx_data*) malloc(sizeof(struct avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; data->cnt = 0; - initid->ptr = (char*)data; - return 0; + initid->ptr = (char*)data; + return 0; } -#ifdef _MSC_VER + #ifdef _MSC_VER __declspec(dllexport) -#endif + #endif void avgx_deinit(UDF_INIT* initid) { - free(initid->ptr); - } + free(initid->ptr); + } -#ifdef _MSC_VER + #ifdef _MSC_VER __declspec(dllexport) -#endif + #endif void avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), - char* message __attribute__((unused))) + char* message __attribute__((unused))) { - struct avgx_data* data = (struct avgx_data*)initid->ptr; - data->sumx = 0; + struct avgx_data* data = (struct avgx_data*)initid->ptr; + data->sumx = 0; data->cnt = 0; } -#ifdef _MSC_VER + #ifdef _MSC_VER __declspec(dllexport) -#endif + #endif void avgx_add(UDF_INIT* initid, UDF_ARGS* args, - char* is_null, - char* message __attribute__((unused))) + char* is_null, + char* message __attribute__((unused))) { // TODO test for NULL in x and y - struct avgx_data* data = (struct avgx_data*)initid->ptr; - double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + struct avgx_data* data = (struct avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); ++data->cnt; - data->sumx += xval; + data->sumx += xval; } -#ifdef _MSC_VER + #ifdef _MSC_VER __declspec(dllexport) -#endif + #endif long long avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), - char* is_null, char* error __attribute__((unused))) + char* is_null, char* error __attribute__((unused))) { - struct avgx_data* data = (struct avgx_data*)initid->ptr; - return data->sumx / data->cnt; + struct avgx_data* data = (struct avgx_data*)initid->ptr; + return data->sumx / data->cnt; } } // vim:ts=4 sw=4: diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index fe1f3fd0e..1096f8431 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -207,7 +207,6 @@ - @@ -220,7 +219,6 @@ - diff --git a/utils/utils.vpj b/utils/utils.vpj index 53da962f3..d81586008 100755 --- a/utils/utils.vpj +++ b/utils/utils.vpj @@ -292,6 +292,8 @@ Filters="*.bmp"/> + Filters=""> + + diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index 79ed61b52..f9e38d9a1 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -53,69 +53,11 @@ using namespace joblist; namespace windowfunction { -template -boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context) +boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context) { boost::shared_ptr func; - switch (ct) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DECIMAL: - { - func.reset(new WF_udaf(id, name, context)); - break; - } - - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - case CalpontSystemCatalog::UDECIMAL: - { - func.reset(new WF_udaf(id, name, context)); - break; - } - - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - { - func.reset(new WF_udaf(id, name, context)); - break; - } - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - { - func.reset(new WF_udaf(id, name, context)); - break; - } - - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::VARBINARY: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::BLOB: - { - func.reset(new WF_udaf(id, name, context)); - break; - } - - default: - { - string errStr = name + "(" + colType2String[ct] + ")"; - errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); - cerr << errStr << endl; - throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); - - break; - } - } + func.reset(new WF_udaf(id, name, context)); // Get the UDAnF function object WF_udaf* wfUDAF = (WF_udaf*)func.get(); @@ -125,30 +67,26 @@ boost::shared_ptr WF_udaf::makeFunction(int id, const str return func; } -template -WF_udaf::WF_udaf(WF_udaf& rhs) : fUDAFContext(rhs.getContext()), +WF_udaf::WF_udaf(WF_udaf& rhs) : fUDAFContext(rhs.getContext()), bInterrupted(rhs.getInterrupted()), fDistinct(rhs.getDistinct()) { getContext().setInterrupted(getInterruptedPtr()); } -template -WindowFunctionType* WF_udaf::clone() const +WindowFunctionType* WF_udaf::clone() const { return new WF_udaf(*const_cast(this)); } -template -void WF_udaf::resetData() +void WF_udaf::resetData() { getContext().getFunction()->reset(&getContext()); - fDistinctSet.clear(); + fDistinctMap.clear(); WindowFunctionType::resetData(); } -template -void WF_udaf::parseParms(const std::vector& parms) +void WF_udaf::parseParms(const std::vector& parms) { bRespectNulls = true; // The last parms: respect null | ignore null @@ -156,10 +94,13 @@ void WF_udaf::parseParms(const std::vector& parms) idbassert(cc != NULL); bool isNull = false; // dummy, harded coded bRespectNulls = (cc->getIntVal(fRow, isNull) > 0); + if (getContext().getRunFlag(mcsv1sdk::UDAF_DISTINCT)) + { + setDistinct(); + } } -template -bool WF_udaf::dropValues(int64_t b, int64_t e) +bool WF_udaf::dropValues(int64_t b, int64_t e) { if (!bHasDropValue) { @@ -168,6 +109,7 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) } mcsv1sdk::mcsv1_UDAF::ReturnCode rc; + bool isNull = false; // Turn on the Analytic flag so the function is aware it is being called // as a Window Function. @@ -175,14 +117,26 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) // Put the parameter metadata (type, scale, precision) into valsIn mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + ConstantColumn* cc = NULL; for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) { - uint64_t colIn = fFieldIndex[i + 1]; mcsv1sdk::ColumnDatum& datum = valsIn[i]; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colIn); + cc = static_cast(fConstantParms[i].get()); + + if (cc) + { + datum.dataType = cc->resultType().colDataType; + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + uint64_t colIn = fFieldIndex[i + 1]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } } for (int64_t i = b; i < e; i++) @@ -190,52 +144,326 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) if (i % 1000 == 0 && fStep->cancelled()) break; - bool bHasNull = false; fRow.setData(getPointer(fRowData->at(i))); - // Turn on NULL flags + + // NULL flags uint32_t flags[getContext().getParameterCount()]; + bool bSkipIt = false; for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { + cc = static_cast(fConstantParms[k].get()); uint64_t colIn = fFieldIndex[k + 1]; mcsv1sdk::ColumnDatum& datum = valsIn[k]; - flags[k] = 0; - if (fRow.isNullValue(colIn) == true) + // Turn on Null flags or skip based on respect nulls + flags[k] = 0; + if ((!cc && fRow.isNullValue(colIn) == true) + || (cc && cc->type() == ConstantColumn::NULLDATA)) { if (!bRespectNulls) { - bHasNull = true; + bSkipIt = true; break; } flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - // Currently, distinct only works for param 1 - if (k == 0) + if (!bSkipIt && !(flags[k] & mcsv1sdk::PARAM_IS_NULL)) { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + switch (datum.dataType) { - continue; + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + { + int64_t valIn; + + if (cc) + { + valIn = cc->getIntVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if (fDistinct) + { + DistinctMap::iterator distinct; + distinct = fDistinctMap.find(valIn); + if (distinct != fDistinctMap.end()) + { + // This is a duplicate: decrement the count + --(*distinct).second; + if ((*distinct).second > 0) // still more of these + { + bSkipIt = true; + continue; + } + else + { + fDistinctMap.erase(distinct); + } + } + } + } + + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + { + int64_t valIn; + + if (cc) + { + valIn = cc->getDecimalVal(fRow, isNull).value; + } + else + { + getValue(colIn, valIn); + } + + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if (fDistinct) + { + DistinctMap::iterator distinct; + distinct = fDistinctMap.find(valIn); + if (distinct != fDistinctMap.end()) + { + // This is a duplicate: decrement the count + --(*distinct).second; + if ((*distinct).second > 0) // still more of these + { + bSkipIt = true; + continue; + } + else + { + fDistinctMap.erase(distinct); + } + } + } + } + + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + case CalpontSystemCatalog::TIME: + case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::DATETIME: + { + uint64_t valIn; + + if (cc) + { + valIn = cc->getUintVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if (fDistinct) + { + DistinctMap::iterator distinct; + distinct = fDistinctMap.find(valIn); + if (distinct != fDistinctMap.end()) + { + // This is a duplicate: decrement the count + --(*distinct).second; + if ((*distinct).second > 0) // still more of these + { + bSkipIt = true; + continue; + } + else + { + fDistinctMap.erase(distinct); + } + } + } + } + + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + { + double valIn; + + if (cc) + { + valIn = cc->getDoubleVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if (fDistinct) + { + DistinctMap::iterator distinct; + distinct = fDistinctMap.find(valIn); + if (distinct != fDistinctMap.end()) + { + // This is a duplicate: decrement the count + --(*distinct).second; + if ((*distinct).second > 0) // still more of these + { + bSkipIt = true; + continue; + } + else + { + fDistinctMap.erase(distinct); + } + } + } + } + + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + { + float valIn; + + if (cc) + { + valIn = cc->getFloatVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if (fDistinct) + { + DistinctMap::iterator distinct; + distinct = fDistinctMap.find(valIn); + if (distinct != fDistinctMap.end()) + { + // This is a duplicate: decrement the count + --(*distinct).second; + if ((*distinct).second > 0) // still more of these + { + bSkipIt = true; + continue; + } + else + { + fDistinctMap.erase(distinct); + } + } + } + } + + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::BLOB: + { + string valIn; + + if (cc) + { + valIn = cc->getStrVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if (fDistinct) + { + DistinctMap::iterator distinct; + distinct = fDistinctMap.find(valIn); + if (distinct != fDistinctMap.end()) + { + // This is a duplicate: decrement the count + --(*distinct).second; + if ((*distinct).second > 0) // still more of these + { + bSkipIt = true; + continue; + } + else + { + fDistinctMap.erase(distinct); + } + } + } + } + + datum.columnData = valIn; + break; + } + + default: + { + string errStr = "(" + colType2String[(int)datum.dataType] + ")"; + errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + + break; + } } - - if (fDistinct) - fDistinctSet.insert(valIn); } - - datum.columnData = valIn; } - if (bHasNull) + if (bSkipIt) { continue; } + getContext().setDataFlags(flags); + rc = getContext().getFunction()->dropValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::NOT_IMPLEMENTED) @@ -257,8 +485,7 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) } // Sets the value from valOut into column colOut, performing any conversions. -template -void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut, +void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut, int64_t b, int64_t e, int64_t c) { static const static_any::any& charTypeId = (char)1; @@ -279,15 +506,6 @@ void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut, CDT colDataType = fRow.getColType(colOut); - if (valOut.empty()) - { - // If valOut is empty, we return NULL - T* pv = NULL; - setValue(colDataType, b, e, c, pv); - fPrev = c; - return; - } - // This may seem a bit convoluted. Users shouldn't return a type // that they didn't set in mcsv1_UDAF::init(), but this // handles whatever return type is given and casts @@ -405,7 +623,14 @@ void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut, case execplan::CalpontSystemCatalog::BIGINT: case execplan::CalpontSystemCatalog::DECIMAL: case execplan::CalpontSystemCatalog::UDECIMAL: - setValue(colDataType, b, e, c, &intOut); + if (valOut.empty()) + { + setValue(colDataType, b, e, c, (int64_t*)NULL); + } + else + { + setValue(colDataType, b, e, c, &intOut); + } break; case execplan::CalpontSystemCatalog::UTINYINT: @@ -416,17 +641,38 @@ void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut, case execplan::CalpontSystemCatalog::DATE: case execplan::CalpontSystemCatalog::DATETIME: case execplan::CalpontSystemCatalog::TIME: - setValue(colDataType, b, e, c, &uintOut); + if (valOut.empty()) + { + setValue(colDataType, b, e, c, (uint64_t*)NULL); + } + else + { + setValue(colDataType, b, e, c, &uintOut); + } break; case execplan::CalpontSystemCatalog::FLOAT: case execplan::CalpontSystemCatalog::UFLOAT: - setValue(colDataType, b, e, c, &floatOut); + if (valOut.empty()) + { + setValue(colDataType, b, e, c, (float*)NULL); + } + else + { + setValue(colDataType, b, e, c, &floatOut); + } break; case execplan::CalpontSystemCatalog::DOUBLE: case execplan::CalpontSystemCatalog::UDOUBLE: - setValue(colDataType, b, e, c, &doubleOut); + if (valOut.empty()) + { + setValue(colDataType, b, e, c, (double*)NULL); + } + else + { + setValue(colDataType, b, e, c, &doubleOut); + } break; case execplan::CalpontSystemCatalog::CHAR: @@ -435,7 +681,14 @@ void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut, case execplan::CalpontSystemCatalog::VARBINARY: case execplan::CalpontSystemCatalog::CLOB: case execplan::CalpontSystemCatalog::BLOB: - setValue(colDataType, b, e, c, &strOut); + if (valOut.empty()) + { + setValue(colDataType, b, e, c, (string*)NULL); + } + else + { + setValue(colDataType, b, e, c, &strOut); + } break; default: @@ -449,8 +702,7 @@ void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut, } } -template -void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) +void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { mcsv1sdk::mcsv1_UDAF::ReturnCode rc; uint64_t colOut = fFieldIndex[0]; @@ -499,7 +751,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) else getContext().clearContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); - bool bHasNull = false; + bool bSkipIt = false; for (int64_t i = b; i <= e; i++) { @@ -510,7 +762,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // NULL flags uint32_t flags[getContext().getParameterCount()]; - bHasNull = false; + bSkipIt = false; for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { @@ -526,14 +778,14 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { if (!bRespectNulls) { - bHasNull = true; + bSkipIt = true; break; } flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - if (!bHasNull && !(flags[k] & mcsv1sdk::PARAM_IS_NULL)) + if (!bSkipIt && !(flags[k] & mcsv1sdk::PARAM_IS_NULL)) { switch (datum.dataType) { @@ -556,15 +808,23 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // Check for distinct, if turned on. // Currently, distinct only works on the first parameter. - if (k == 0) + if (k == 0 && fDistinct) { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + // MCOL-1698 + std::pair val = make_pair(valIn, 1); + // Unordered_map will not insert a duplicate key (valIn). + // If it doesn't insert, the original pair will be returned + // in distinct.first and distinct.second will be a bool -- + // true if newly inserted, false if a duplicate. + std::pair distinct; + distinct = fDistinctMap.insert(val); + if (distinct.second == false) { + // This is a duplicate: increment the count + ++(*distinct.first).second; + bSkipIt = true; continue; } - - if (fDistinct) - fDistinctSet.insert(valIn); } datum.columnData = valIn; @@ -587,15 +847,17 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // Check for distinct, if turned on. // Currently, distinct only works on the first parameter. - if (k == 0) + if (k == 0 && fDistinct) { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + std::pair val = make_pair(valIn, 1); + std::pair distinct; + distinct = fDistinctMap.insert(val); + if (distinct.second == false) { + ++(*distinct.first).second; + bSkipIt = true; continue; } - - if (fDistinct) - fDistinctSet.insert(valIn); } datum.columnData = valIn; @@ -607,6 +869,9 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) case CalpontSystemCatalog::UMEDINT: case CalpontSystemCatalog::UINT: case CalpontSystemCatalog::UBIGINT: + case CalpontSystemCatalog::TIME: + case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::DATETIME: { uint64_t valIn; @@ -621,15 +886,17 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // Check for distinct, if turned on. // Currently, distinct only works on the first parameter. - if (k == 0) + if (k == 0 && fDistinct) { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + std::pair val = make_pair(valIn, 1); + std::pair distinct; + distinct = fDistinctMap.insert(val); + if (distinct.second == false) { + ++(*distinct.first).second; + bSkipIt = true; continue; } - - if (fDistinct) - fDistinctSet.insert(valIn); } datum.columnData = valIn; @@ -652,15 +919,17 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // Check for distinct, if turned on. // Currently, distinct only works on the first parameter. - if (k == 0) + if (k == 0 && fDistinct) { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + std::pair val = make_pair(valIn, 1); + std::pair distinct; + distinct = fDistinctMap.insert(val); + if (distinct.second == false) { + ++(*distinct.first).second; + bSkipIt = true; continue; } - - if (fDistinct) - fDistinctSet.insert(valIn); } datum.columnData = valIn; @@ -683,15 +952,17 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // Check for distinct, if turned on. // Currently, distinct only works on the first parameter. - if (k == 0) + if (k == 0 && fDistinct) { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + std::pair val = make_pair(valIn, 1); + std::pair distinct; + distinct = fDistinctMap.insert(val); + if (distinct.second == false) { + ++(*distinct.first).second; + bSkipIt = true; continue; } - - if (fDistinct) - fDistinctSet.insert(valIn); } datum.columnData = valIn; @@ -717,15 +988,17 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // Check for distinct, if turned on. // Currently, distinct only works on the first parameter. - if (k == 0) + if (k == 0 && fDistinct) { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + std::pair val = make_pair(valIn, 1); + std::pair distinct; + distinct = fDistinctMap.insert(val); + if (distinct.second == false) { + ++(*distinct.first).second; + bSkipIt = true; continue; } - - if (fDistinct) - fDistinctSet.insert(valIn); } datum.columnData = valIn; @@ -734,7 +1007,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) default: { - string errStr = "(" + colType2String[i] + ")"; + string errStr = "(" + colType2String[(int)datum.dataType] + ")"; errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); cerr << errStr << endl; throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); @@ -746,7 +1019,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) } // Skip if any value is NULL and respect nulls is off. - if (bHasNull) + if (bSkipIt) { continue; } @@ -780,8 +1053,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) fPrev = c; } -template -boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context); +boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context); } //namespace // vim:ts=4 sw=4: diff --git a/utils/windowfunction/wf_udaf.h b/utils/windowfunction/wf_udaf.h index ef2ca5853..38515285f 100644 --- a/utils/windowfunction/wf_udaf.h +++ b/utils/windowfunction/wf_udaf.h @@ -22,9 +22,9 @@ #define UTILS_WF_UDAF_H #ifndef _MSC_VER -#include +#include #else -#include +#include #endif #include "windowfunctiontype.h" #include "mcsv1_udaf.h" @@ -45,7 +45,7 @@ public: class DistinctEqual { public: - inline bool operator()(const static_any::any& lhs, static_any::any& rhs) const + inline bool operator()(const static_any::any lhs, static_any::any rhs) const { return lhs == rhs; } @@ -53,7 +53,6 @@ public: // A class to control the execution of User Define Analytic Functions (UDAnF) // as defined by a specialization of mcsv1sdk::mcsv1_UDAF -template class WF_udaf : public WindowFunctionType { public: @@ -84,6 +83,11 @@ public: return fDistinct; } + void setDistinct(bool d = true) + { + fDistinct = d; + } + protected: void SetUDAFValue(static_any::any& valOut, int64_t colOut, int64_t b, int64_t e, int64_t c); @@ -92,8 +96,10 @@ protected: bool fDistinct; bool bRespectNulls; // respect null | ignore null bool bHasDropValue; // Set to false when we discover the UDAnF doesn't implement dropValue. - // To hold distinct values - std::tr1::unordered_set fDistinctSet; + // To hold distinct values and their counts + typedef std::tr1::unordered_map DistinctMap; + DistinctMap fDistinctMap; + static_any::any fValOut; // The return value public: diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp index dfceb6364..efede3ef5 100644 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -208,7 +208,7 @@ WindowFunctionType::makeWindowFunction(const string& name, int ct, WindowFunctio break; case WF__UDAF: - af = WF_udaf::makeFunction(functionId, name, ct, wc->getUDAFContext()); + af = WF_udaf::makeFunction(functionId, name, ct, wc->getUDAFContext()); break; case WF__REGR_SLOPE: diff --git a/writeengine/bulk/we_tableinfo.cpp b/writeengine/bulk/we_tableinfo.cpp index da28204d2..2885d1462 100644 --- a/writeengine/bulk/we_tableinfo.cpp +++ b/writeengine/bulk/we_tableinfo.cpp @@ -736,6 +736,17 @@ int TableInfo::setParseComplete(const int& columnId, #ifdef PROFILE Stats::startParseEvent(WE_STATS_FLUSH_PRIMPROC_BLOCKS); #endif + if (fLog->isDebug(DEBUG_2)) + { + ostringstream oss; + oss << "Dictionary cache flush: "; + for (uint32_t i = 0; i < fDictFlushBlks.size(); i++) + { + oss << fDictFlushBlks[i] << ", "; + } + oss << endl; + fLog->logMsg( oss.str(), MSGLVL_INFO1 ); + } cacheutils::flushPrimProcAllverBlocks(fDictFlushBlks); #ifdef PROFILE Stats::stopParseEvent(WE_STATS_FLUSH_PRIMPROC_BLOCKS); diff --git a/writeengine/server/we_dmlcommandproc.cpp b/writeengine/server/we_dmlcommandproc.cpp index 86625d013..057ef2504 100644 --- a/writeengine/server/we_dmlcommandproc.cpp +++ b/writeengine/server/we_dmlcommandproc.cpp @@ -2229,12 +2229,13 @@ uint8_t WE_DMLCommandProc::commitBatchAutoOn(messageqcpp::ByteStream& bs, std::s { std::set::iterator lbidIter; std::vector dictFlushBlks; - + cerr << "API Flushing blocks: "; for (lbidIter = (*mapIter).second.begin(); lbidIter != (*mapIter).second.end(); lbidIter++) { + cerr << *lbidIter << ", "; dictFlushBlks.push_back((*lbidIter)); } - + cerr << endl; cacheutils::flushPrimProcAllverBlocks(dictFlushBlks); fWEWrapper.getDictMap().erase(txnID); } diff --git a/writeengine/shared/we_bulkrollbackmgr.cpp b/writeengine/shared/we_bulkrollbackmgr.cpp index 75d9de8c9..e9872da50 100644 --- a/writeengine/shared/we_bulkrollbackmgr.cpp +++ b/writeengine/shared/we_bulkrollbackmgr.cpp @@ -198,14 +198,16 @@ int BulkRollbackMgr::rollback ( bool keepMetaFile ) // the user but keep going. std::vector allOIDs; std::set::const_iterator iter = fAllColDctOIDs.begin(); - + cerr << "Rollback flushing: "; while (iter != fAllColDctOIDs.end()) { + cerr << *iter << ", "; //std::cout << "Flushing OID from PrimProc cache " << *iter << // std::endl; allOIDs.push_back(*iter); ++iter; } + cerr << endl; int cache_rc = cacheutils::flushOIDsFromCache( allOIDs ); diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index 923871ef9..b84f3bce9 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -187,6 +187,37 @@ int WriteEngineWrapper::checkValid(const TxnID& txnid, const ColStructList& colS return NO_ERROR; } +/*@brief findSmallestColumn --Find the smallest column for this table + */ +/*********************************************************** + * DESCRIPTION: + * Find the smallest column for this table + * PARAMETERS: + * lowColLen - returns smallest column width + * colId - returns smallest column id + * colStructList - column struct list + * RETURN: + * void + ***********************************************************/ +void WriteEngineWrapper::findSmallestColumn(uint32_t& colId, ColStructList colStructList) +// MCOL-1675: find the smallest column width to calculate the RowID from so +// that all HWMs will be incremented by this operation +{ + int32_t lowColLen = 8192; + for (uint32_t colIt = 0; colIt < colStructList.size(); colIt++) + { + if (colStructList[colIt].colWidth < lowColLen) + { + colId = colIt; + lowColLen = colStructList[colId].colWidth; + if ( lowColLen == 1 ) + { + break; + } + } + } +} + /*@convertValArray - Convert interface values to internal values */ /*********************************************************** @@ -953,6 +984,11 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, for (i = 0; i < colStructList.size(); i++) Convertor::convertColType(&colStructList[i]); + uint32_t colId = 0; + // MCOL-1675: find the smallest column width to calculate the RowID from so + // that all HWMs will be incremented by this operation + findSmallestColumn(colId, colStructList); + // rc = checkValid(txnid, colStructList, colValueList, ridList); // if (rc != NO_ERROR) // return rc; @@ -979,8 +1015,8 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, //-------------------------------------------------------------------------- if (isFirstBatchPm) { - currentDBrootIdx = dbRootExtentTrackers[0]->getCurrentDBRootIdx(); - extentInfo = dbRootExtentTrackers[0]->getDBRootExtentList(); + currentDBrootIdx = dbRootExtentTrackers[colId]->getCurrentDBRootIdx(); + extentInfo = dbRootExtentTrackers[colId]->getDBRootExtentList(); dbRoot = extentInfo[currentDBrootIdx].fDbRoot; partitionNum = extentInfo[currentDBrootIdx].fPartition; @@ -1027,7 +1063,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, { colOp = m_colOp[op(colStructList[i].fCompressionType)]; colOp->initColumn(curCol); - colOp->setColParam(curCol, 0, colStructList[i].colWidth, colStructList[i].colDataType, + colOp->setColParam(curCol, colId, colStructList[i].colWidth, colStructList[i].colDataType, colStructList[i].colType, colStructList[i].dataOid, colStructList[i].fCompressionType, dbRoot, partitionNum, segmentNum); rc = colOp->extendColumn(curCol, false, extents[i].startBlkOffset, extents[i].startLbid, extents[i].allocSize, dbRoot, @@ -1165,7 +1201,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, } // if (isFirstBatchPm) else //get the extent info from tableMetaData { - ColExtsInfo aColExtsInfo = tableMetaData->getColExtsInfo(colStructList[0].dataOid); + ColExtsInfo aColExtsInfo = tableMetaData->getColExtsInfo(colStructList[colId].dataOid); ColExtsInfo::iterator it = aColExtsInfo.begin(); while (it != aColExtsInfo.end()) @@ -1201,7 +1237,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, //-------------------------------------------------------------------------- // allocate row id(s) //-------------------------------------------------------------------------- - curColStruct = colStructList[0]; + curColStruct = colStructList[colId]; colOp = m_colOp[op(curColStruct.fCompressionType)]; colOp->initColumn(curCol); @@ -1212,26 +1248,29 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, vector fileInfo; dbRoot = curColStruct.fColDbRoot; //use the first column to calculate row id - ColExtsInfo aColExtsInfo = tableMetaData->getColExtsInfo(colStructList[0].dataOid); + ColExtsInfo aColExtsInfo = tableMetaData->getColExtsInfo(colStructList[colId].dataOid); ColExtsInfo::iterator it = aColExtsInfo.begin(); while (it != aColExtsInfo.end()) { - if ((it->dbRoot == colStructList[0].fColDbRoot) && (it->partNum == colStructList[0].fColPartition) && (it->segNum == colStructList[0].fColSegment) && it->current ) + if ((it->dbRoot == colStructList[colId].fColDbRoot) && + (it->partNum == colStructList[colId].fColPartition) && + (it->segNum == colStructList[colId].fColSegment) && it->current ) + { break; - + } it++; } if (it != aColExtsInfo.end()) { hwm = it->hwm; - //cout << "Got from colextinfo hwm for oid " << colStructList[0].dataOid << " is " << hwm << " and seg is " << colStructList[0].fColSegment << endl; + //cout << "Got from colextinfo hwm for oid " << colStructList[colId].dataOid << " is " << hwm << " and seg is " << colStructList[0].fColSegment << endl; } oldHwm = hwm; //Save this info for rollback //need to pass real dbRoot, partition, and segment to setColParam - colOp->setColParam(curCol, 0, curColStruct.colWidth, curColStruct.colDataType, + colOp->setColParam(curCol, colId, curColStruct.colWidth, curColStruct.colDataType, curColStruct.colType, curColStruct.dataOid, curColStruct.fCompressionType, curColStruct.fColDbRoot, curColStruct.fColPartition, curColStruct.fColSegment); rc = colOp->openColumnFile(curCol, segFile, useTmpSuffix); // @bug 5572 HDFS tmp file @@ -1261,8 +1300,8 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, curCol, (uint64_t)totalRow, rowIdArray, hwm, newExtent, rowsLeft, newHwm, newFile, newColStructList, newDctnryStructList, dbRootExtentTrackers, insertSelect, true, tableOid, isFirstBatchPm); - //cout << "after allocrowid, total row = " < 256K. // if totalRow == rowsLeft, then not adding rows to 1st extent, so skip it. //-------------------------------------------------------------------------- -// DMC-SHARED_NOTHING_NOTE: Is it safe to assume only part0 seg0 is abbreviated? + // DMC-SHARED_NOTHING_NOTE: Is it safe to assume only part0 seg0 is abbreviated? if ((curCol.dataFile.fPartition == 0) && (curCol.dataFile.fSegment == 0) && ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (size_t k = 1; k < colStructList.size(); k++) + for (unsigned k=0; ksetColParam(expandCol, 0, @@ -1683,19 +1725,11 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, for (i = 0; i < colStructList.size(); i++) Convertor::convertColType(&colStructList[i]); - // MCOL-984: find the smallest column width to calculate the RowID from so + uint32_t colId = 0; + // MCOL-1675: find the smallest column width to calculate the RowID from so // that all HWMs will be incremented by this operation - int32_t lowColLen = 8192; - int32_t colId = 0; + findSmallestColumn(colId, colStructList); - for (uint32_t colIt = 0; colIt < colStructList.size(); colIt++) - { - if (colStructList[colIt].colWidth < lowColLen) - { - colId = colIt; - lowColLen = colStructList[colId].colWidth; - } - } // rc = checkValid(txnid, colStructList, colValueList, ridList); // if (rc != NO_ERROR) @@ -2019,7 +2053,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, // Expand initial abbreviated extent if any RID in 1st extent is > 256K. // if totalRow == rowsLeft, then not adding rows to 1st extent, so skip it. //-------------------------------------------------------------------------- -// DMC-SHARED_NOTHING_NOTE: Is it safe to assume only part0 seg0 is abbreviated? + // DMC-SHARED_NOTHING_NOTE: Is it safe to assume only part0 seg0 is abbreviated? if ((curCol.dataFile.fPartition == 0) && (curCol.dataFile.fSegment == 0) && ((totalRow - rowsLeft) > 0) && @@ -2032,7 +2066,8 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, continue; Column expandCol; - colOp = m_colOp[op(colStructList[k].fCompressionType)]; + colOp = m_colOp[op(colStructList[k].fCompressionType)]; + // Shouldn't we change 0 to colId here? colOp->setColParam(expandCol, 0, colStructList[k].colWidth, colStructList[k].colDataType, @@ -3106,6 +3141,11 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, for (i = 0; i < colStructList.size(); i++) Convertor::convertColType(&colStructList[i]); + uint32_t colId = 0; + // MCOL-1675: find the smallest column width to calculate the RowID from so + // that all HWMs will be incremented by this operation + findSmallestColumn(colId, colStructList); + rc = checkValid(txnid, colStructList, colValueList, ridList); if (rc != NO_ERROR) @@ -3124,7 +3164,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, //-------------------------------------------------------------------------- // allocate row id(s) //-------------------------------------------------------------------------- - curColStruct = colStructList[0]; + curColStruct = colStructList[colId]; colOp = m_colOp[op(curColStruct.fCompressionType)]; colOp->initColumn(curCol); @@ -3161,7 +3201,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, oldHwm = hwm; //Save this info for rollback //need to pass real dbRoot, partition, and segment to setColParam - colOp->setColParam(curCol, 0, curColStruct.colWidth, curColStruct.colDataType, + colOp->setColParam(curCol, colId, curColStruct.colWidth, curColStruct.colDataType, curColStruct.colType, curColStruct.dataOid, curColStruct.fCompressionType, dbRoot, partitionNum, segmentNum); @@ -3279,13 +3319,15 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, // if totalRow == rowsLeft, then not adding rows to 1st extent, so skip it. //-------------------------------------------------------------------------- // DMC-SHARED_NOTHING_NOTE: Is it safe to assume only part0 seg0 is abbreviated? - if ((colStructList[0].fColPartition == 0) && - (colStructList[0].fColSegment == 0) && + if ((colStructList[colId].fColPartition == 0) && + (colStructList[colId].fColSegment == 0) && ((totalRow - rowsLeft) > 0) && (rowIdArray[totalRow - rowsLeft - 1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (size_t k = 1; k < colStructList.size(); k++) + for (unsigned k=0; ksetColParam(expandCol, 0, diff --git a/writeengine/wrapper/writeengine.h b/writeengine/wrapper/writeengine.h index f0fe5c995..864d064b1 100644 --- a/writeengine/wrapper/writeengine.h +++ b/writeengine/wrapper/writeengine.h @@ -633,6 +633,11 @@ private: int checkValid(const TxnID& txnid, const ColStructList& colStructList, const ColValueList& colValueList, const RIDList& ridList) const; /** + * @brief Find the smallest column for this table + */ + void findSmallestColumn(uint32_t &colId, ColStructList colStructList); + + /** * @brief Convert interface column type to a internal column type */ // void convertColType(void* curStruct, const FuncType curType = FUNC_WRITE_ENGINE) const;