From 611cdb204dc5f7af6a8ab250f993b1aa0cccf71d Mon Sep 17 00:00:00 2001 From: David Hall Date: Thu, 16 Aug 2018 11:17:39 -0500 Subject: [PATCH 01/32] MCOL-521 Re-do changes for MariaDB 10.3 merge --- dbcon/joblist/tupleaggregatestep.cpp | 475 +++++++++++---------------- utils/udfsdk/CMakeLists.txt | 2 +- utils/udfsdk/mcsv1_udaf.cpp | 13 +- utils/udfsdk/mcsv1_udaf.h | 2 +- utils/udfsdk/udfmysql.cpp | 242 +++++++------- utils/udfsdk/udfsdk.vpj | 2 - 6 files changed, 312 insertions(+), 424 deletions(-) diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index da91919f0..429d5821d 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -849,10 +849,9 @@ SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo) idbassert(cc != NULL); // @bug5261 bool isNull = (ConstantColumn::NULLDATA == cc->type()); - if (ac->aggOp() == ROWAGG_UDAF) + if (ac->aggOp() == AggregateColumn::UDAF) { UDAFColumn* udafc = dynamic_cast(ac); - if (udafc) { constAggDataVec.push_back( @@ -1099,7 +1098,6 @@ void TupleAggregateStep::prep1PhaseAggregate( uint32_t bigUintWidth = sizeof(uint64_t); // For UDAF uint32_t projColsUDAFIdx = 0; - uint32_t udafcParamIdx = 0; UDAFColumn* udafc = NULL; mcsv1sdk::mcsv1_UDAF* pUDAFFunc = NULL; // for count column of average function @@ -1296,12 +1294,10 @@ void TupleAggregateStep::prep1PhaseAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; - for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); projColsUDAFIdx++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -1310,7 +1306,6 @@ void TupleAggregateStep::prep1PhaseAggregate( break; } } - if (it == jobInfo.projectionCols.end()) { throw logic_error("(1)prep1PhaseAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -1489,44 +1484,11 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(udafFuncCol->fUDAFContext.getPrecision()); typeAgg.push_back(udafFuncCol->fUDAFContext.getResultType()); widthAgg.push_back(udafFuncCol->fUDAFContext.getColWidth()); - // If the first param is const - udafcParamIdx = 0; - ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - - if (cc) - { - funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; - } - - ++udafcParamIdx; break; } case ROWAGG_MULTI_PARM: { - oidsAgg.push_back(oidsProj[colProj]); - keysAgg.push_back(key); - scaleAgg.push_back(scaleProj[colProj]); - precisionAgg.push_back(precisionProj[colProj]); - typeAgg.push_back(typeProj[colProj]); - widthAgg.push_back(width[colProj]); - - // If the param is const - if (udafc) - { - ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - - if (cc) - { - funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; - } - } - else - { - throw QueryDataExcept("prep1PhaseAggregate: UDAF multi function with no parms", aggregateFuncErr); - } - - ++udafcParamIdx; } break; @@ -1901,7 +1863,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( break; } } - if (it == jobInfo.projectionCols.end()) { throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -2121,12 +2082,10 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // If the first param is const udafcParamIdx = 0; ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; } - ++udafcParamIdx; break; } @@ -2141,12 +2100,10 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( widthAgg.push_back(widthProj[colProj]); multiParmIndexes.push_back(colAgg); ++colAgg; - // If the param is const if (udafc) { ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; @@ -2156,7 +2113,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( { throw QueryDataExcept("prep1PhaseDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr); } - ++udafcParamIdx; } break; @@ -2208,10 +2164,9 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( groupByNoDist.push_back(groupby); aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i)); } - + // locate the return column position in aggregated rowgroup uint64_t outIdx = 0; - for (uint64_t i = 0; i < returnedColVec.size(); i++) { udafc = NULL; @@ -2256,19 +2211,16 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; - for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); projColsUDAFIdx++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); break; } } - if (it == jobInfo.projectionCols.end()) { throw logic_error("(1)prep1PhaseDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -2565,7 +2517,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( { // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colAgg, outIdx)); @@ -2609,7 +2560,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } - ++outIdx; } // for (i @@ -2860,7 +2810,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( ++multiParms; continue; } - if (returnedColVec[k].first != distinctColKey) continue; @@ -2881,7 +2830,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex - multiParms)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -2909,7 +2858,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( ++multiParms; continue; } - // search non-distinct functions in functionVec vector::iterator it = functionVec2.begin(); @@ -2925,7 +2873,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex - multiParms)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } else if ((f->fOutputColumnIndex == k) && @@ -2947,7 +2895,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex - multiParms)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -3183,12 +3131,10 @@ void TupleAggregateStep::prep2PhasesAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; - for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); projColsUDAFIdx++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -3197,7 +3143,6 @@ void TupleAggregateStep::prep2PhasesAggregate( break; } } - if (it == jobInfo.projectionCols.end()) { throw logic_error("(1)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -3420,12 +3365,10 @@ void TupleAggregateStep::prep2PhasesAggregate( // If the first param is const udafcParamIdx = 0; ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; } - ++udafcParamIdx; break; } @@ -3439,12 +3382,10 @@ void TupleAggregateStep::prep2PhasesAggregate( typeAggPm.push_back(typeProj[colProj]); widthAggPm.push_back(width[colProj]); colAggPm++; - // If the param is const if (udafc) { ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; @@ -3454,7 +3395,6 @@ void TupleAggregateStep::prep2PhasesAggregate( { throw QueryDataExcept("prep2PhasesAggregate: UDAF multi function with no parms", aggregateFuncErr); } - ++udafcParamIdx; } break; @@ -3482,7 +3422,6 @@ void TupleAggregateStep::prep2PhasesAggregate( AGG_MAP aggDupFuncMap; projColsUDAFIdx = 0; - // copy over the groupby vector // update the outputColumnIndex if returned for (uint64_t i = 0; i < groupByPm.size(); i++) @@ -3494,7 +3433,6 @@ void TupleAggregateStep::prep2PhasesAggregate( // locate the return column position in aggregated rowgroup from PM // outIdx is i without the multi-columns, uint64_t outIdx = 0; - for (uint64_t i = 0; i < returnedColVec.size(); i++) { uint32_t retKey = returnedColVec[i].first; @@ -3511,7 +3449,6 @@ void TupleAggregateStep::prep2PhasesAggregate( // Is this a UDAF? use the function as part of the key. pUDAFFunc = NULL; udafc = NULL; - if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; @@ -3520,14 +3457,12 @@ void TupleAggregateStep::prep2PhasesAggregate( { udafc = dynamic_cast((*it).get()); projColsUDAFIdx++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); break; } } - if (it == jobInfo.projectionCols.end()) { throw logic_error("(3)prep2PhasesAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -3680,7 +3615,6 @@ void TupleAggregateStep::prep2PhasesAggregate( { // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colPm, outIdx)); @@ -3722,7 +3656,6 @@ void TupleAggregateStep::prep2PhasesAggregate( if (returnedColVec[i].second == AggregateColumn::AVG) avgFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } - ++outIdx; } @@ -4067,12 +4000,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; - for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); projColsUDAFIdx++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); @@ -4081,7 +4012,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( break; } } - if (it == jobInfo.projectionCols.end()) { throw logic_error("(1)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -4300,12 +4230,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // If the first param is const udafcParamIdx = 0; ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; } - ++udafcParamIdx; break; } @@ -4320,12 +4248,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( widthAggPm.push_back(width[colProj]); multiParmIndexes.push_back(colAggPm); colAggPm++; - // If the param is const if (udafc) { ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); - if (cc) { funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; @@ -4335,7 +4261,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( { throw QueryDataExcept("prep2PhasesDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr); } - ++udafcParamIdx; } break; @@ -4378,17 +4303,15 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (funcPm->fAggFunction == ROWAGG_UDAF) { RowUDAFFunctionCol* udafFuncCol = dynamic_cast(funcPm.get()); - if (!udafFuncCol) { - throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); + throw logic_error("(3)prep2PhasesDistinctAggregate: A UDAF function is called but there's no RowUDAFFunctionCol"); } - funct.reset(new RowUDAFFunctionCol( udafFuncCol->fUDAFContext, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fOutputColumnIndex - multiParms, - udafFuncCol->fAuxColumnIndex - multiParms)); + udafFuncCol->fOutputColumnIndex-multiParms, + udafFuncCol->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); pUDAFFunc = udafFuncCol->fUDAFContext.getFunction(); } @@ -4398,8 +4321,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funcPm->fAggFunction, funcPm->fStatsFunction, funcPm->fOutputColumnIndex, - funcPm->fOutputColumnIndex - multiParms, - funcPm->fAuxColumnIndex - multiParms)); + funcPm->fOutputColumnIndex-multiParms, + funcPm->fAuxColumnIndex-multiParms)); functionNoDistVec.push_back(funct); pUDAFFunc = NULL; } @@ -4412,7 +4335,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( { continue; } - oidsAggUm.push_back(oidsAggPm[idx]); keysAggUm.push_back(keysAggPm[idx]); scaleAggUm.push_back(scaleAggPm[idx]); @@ -4449,7 +4371,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // locate the return column position in aggregated rowgroup from PM // outIdx is i without the multi-columns, uint64_t outIdx = 0; - for (uint64_t i = 0; i < returnedColVec.size(); i++) { pUDAFFunc = NULL; @@ -4470,19 +4391,16 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_UDAF) { std::vector::iterator it = jobInfo.projectionCols.begin() + projColsUDAFIdx; - for (; it != jobInfo.projectionCols.end(); it++) { udafc = dynamic_cast((*it).get()); projColsUDAFIdx++; - if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); break; } } - if (it == jobInfo.projectionCols.end()) { throw logic_error("(4)prep2PhasesDistinctAggregate: A UDAF function is called but there\'s not enough UDAFColumns"); @@ -4496,241 +4414,222 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it != aggFuncMap.end()) { - colUm = it->second; - } - else - { - ostringstream emsg; - emsg << "'" << jobInfo.keyInfo->tupleKeyToName[retKey] << "' isn't in tuple."; - cerr << "prep2PhasesDistinctAggregate: distinct " << emsg.str() - << " oid=" << (int) jobInfo.keyInfo->tupleKeyVec[retKey].fId - << ", alias=" << jobInfo.keyInfo->tupleKeyVec[retKey].fTable; - - if (jobInfo.keyInfo->tupleKeyVec[retKey].fView.length() > 0) - cerr << ", view=" << jobInfo.keyInfo->tupleKeyVec[retKey].fView; - - cerr << endl; - throw QueryDataExcept(emsg.str(), aggregateFuncErr); + colUm = it->second - multiParms; } } - switch (aggOp) + if (colUm > -1) // Means we found a DISTINCT and have a column number { - case ROWAGG_DISTINCT_AVG: - - //avgFuncMap.insert(make_pair(key, funct)); - case ROWAGG_DISTINCT_SUM: + switch (aggOp) { - if (typeAggUm[colUm] == CalpontSystemCatalog::CHAR || - typeAggUm[colUm] == CalpontSystemCatalog::VARCHAR || - typeAggUm[colUm] == CalpontSystemCatalog::BLOB || - typeAggUm[colUm] == CalpontSystemCatalog::TEXT || - typeAggUm[colUm] == CalpontSystemCatalog::DATE || - typeAggUm[colUm] == CalpontSystemCatalog::DATETIME || - typeAggUm[colUm] == CalpontSystemCatalog::TIME) + case ROWAGG_DISTINCT_AVG: + + //avgFuncMap.insert(make_pair(key, funct)); + case ROWAGG_DISTINCT_SUM: { - Message::Args args; - args.add("sum/average"); - args.add(colTypeIdString(typeAggUm[colUm])); - string emsg = IDBErrorInfo::instance()-> - errorMsg(ERR_AGGREGATE_TYPE_NOT_SUPPORT, args); - cerr << "prep2PhasesDistinctAggregate: " << emsg << endl; - throw IDBExcept(emsg, ERR_AGGREGATE_TYPE_NOT_SUPPORT); + if (typeAggUm[colUm] == CalpontSystemCatalog::CHAR || + typeAggUm[colUm] == CalpontSystemCatalog::VARCHAR || + typeAggUm[colUm] == CalpontSystemCatalog::BLOB || + typeAggUm[colUm] == CalpontSystemCatalog::TEXT || + typeAggUm[colUm] == CalpontSystemCatalog::DATE || + typeAggUm[colUm] == CalpontSystemCatalog::DATETIME || + typeAggUm[colUm] == CalpontSystemCatalog::TIME) + { + Message::Args args; + args.add("sum/average"); + args.add(colTypeIdString(typeAggUm[colUm])); + string emsg = IDBErrorInfo::instance()-> + errorMsg(ERR_AGGREGATE_TYPE_NOT_SUPPORT, args); + cerr << "prep2PhasesDistinctAggregate: " << emsg << endl; + throw IDBExcept(emsg, ERR_AGGREGATE_TYPE_NOT_SUPPORT); + } + + oidsAggDist.push_back(oidsAggUm[colUm]); + keysAggDist.push_back(retKey); + + if (typeAggUm[colUm] != CalpontSystemCatalog::DOUBLE && + typeAggUm[colUm] != CalpontSystemCatalog::FLOAT) + { + if (isUnsigned(typeAggUm[colUm])) + { + typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); + precisionAggDist.push_back(20); + } + else + { + typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + precisionAggDist.push_back(19); + } + + uint32_t scale = scaleAggUm[colUm]; + + // for int average, FE expects a decimal + if (aggOp == ROWAGG_DISTINCT_AVG) + scale = jobInfo.scaleOfAvg[retKey]; // scale += 4; + + scaleAggDist.push_back(scale); + widthAggDist.push_back(bigIntWidth); + } + else + { + typeAggDist.push_back(typeAggUm[colUm]); + scaleAggDist.push_back(scaleAggUm[colUm]); + precisionAggDist.push_back(precisionAggUm[colUm]); + widthAggDist.push_back(widthAggUm[colUm]); + } } + // PM: put the count column for avg next to the sum + // let fall through to add a count column for average function + //if (aggOp != ROWAGG_DISTINCT_AVG) + break; - oidsAggDist.push_back(oidsAggUm[colUm]); - keysAggDist.push_back(retKey); - - if (typeAggUm[colUm] != CalpontSystemCatalog::DOUBLE && - typeAggUm[colUm] != CalpontSystemCatalog::FLOAT) + case ROWAGG_COUNT_DISTINCT_COL_NAME: { + oidsAggDist.push_back(oidsAggUm[colUm]); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(0); + // work around count() in select subquery + precisionAggDist.push_back(9999); + if (isUnsigned(typeAggUm[colUm])) { typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); - precisionAggDist.push_back(20); } else { typeAggDist.push_back(CalpontSystemCatalog::BIGINT); - precisionAggDist.push_back(19); } - uint32_t scale = scaleAggUm[colUm]; - - // for int average, FE expects a decimal - if (aggOp == ROWAGG_DISTINCT_AVG) - scale = jobInfo.scaleOfAvg[retKey]; // scale += 4; - - scaleAggDist.push_back(scale); widthAggDist.push_back(bigIntWidth); } - else - { - typeAggDist.push_back(typeAggUm[colUm]); - scaleAggDist.push_back(scaleAggUm[colUm]); - precisionAggDist.push_back(precisionAggUm[colUm]); - widthAggDist.push_back(widthAggUm[colUm]); - } - } - // PM: put the count column for avg next to the sum - // let fall through to add a count column for average function - //if (aggOp != ROWAGG_DISTINCT_AVG) - break; + break; - case ROWAGG_COUNT_DISTINCT_COL_NAME: + default: + // cound happen if agg and agg distinct use same column. + colUm = -1; + break; + } // switch + } + // For non distinct aggregates + if (colUm == -1) + { + AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); + + if (it != aggFuncMap.end()) { + colUm = it->second - multiParms; oidsAggDist.push_back(oidsAggUm[colUm]); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(0); - // work around count() in select subquery - precisionAggDist.push_back(9999); - - if (isUnsigned(typeAggUm[colUm])) - { - typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); - } - else - { - typeAggDist.push_back(CalpontSystemCatalog::BIGINT); - } - - widthAggDist.push_back(bigIntWidth); + keysAggDist.push_back(keysAggUm[colUm]); + scaleAggDist.push_back(scaleAggUm[colUm]); + precisionAggDist.push_back(precisionAggUm[colUm]); + typeAggDist.push_back(typeAggUm[colUm]); + widthAggDist.push_back(widthAggUm[colUm]); + colUm -= multiParms; } - break; - case ROWAGG_MIN: - case ROWAGG_MAX: - case ROWAGG_SUM: - case ROWAGG_AVG: - case ROWAGG_COUNT_ASTERISK: - case ROWAGG_COUNT_COL_NAME: - case ROWAGG_STATS: - case ROWAGG_BIT_AND: - case ROWAGG_BIT_OR: - case ROWAGG_BIT_XOR: - case ROWAGG_CONSTANT: - default: + // not a direct hit -- a returned column is not already in the RG from PMs + else { - AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); + bool returnColMissing = true; - if (it != aggFuncMap.end()) + // check if a SUM or COUNT covered by AVG + if (aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) { - colUm = it->second; - oidsAggDist.push_back(oidsAggUm[colUm]); - keysAggDist.push_back(keysAggUm[colUm]); - scaleAggDist.push_back(scaleAggUm[colUm]); - precisionAggDist.push_back(precisionAggUm[colUm]); - typeAggDist.push_back(typeAggUm[colUm]); - widthAggDist.push_back(widthAggUm[colUm]); - colUm -= multiParms; - } + it = aggFuncMap.find(boost::make_tuple(returnedColVec[i].first, ROWAGG_AVG, pUDAFFunc)); - // not a direct hit -- a returned column is not already in the RG from PMs - else - { - bool returnColMissing = true; - - // check if a SUM or COUNT covered by AVG - if (aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) + if (it != aggFuncMap.end()) { - it = aggFuncMap.find(boost::make_tuple(returnedColVec[i].first, ROWAGG_AVG, pUDAFFunc)); + // false alarm + returnColMissing = false; - if (it != aggFuncMap.end()) + colUm = it->second - multiParms; + + if (aggOp == ROWAGG_SUM) { - // false alarm - returnColMissing = false; + oidsAggDist.push_back(oidsAggUm[colUm]); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(scaleAggUm[colUm] >> 8); + precisionAggDist.push_back(precisionAggUm[colUm]); + typeAggDist.push_back(typeAggUm[colUm]); + widthAggDist.push_back(widthAggUm[colUm]); + } + else + { + // leave the count() to avg + aggOp = ROWAGG_COUNT_NO_OP; - colUm = it->second; - - if (aggOp == ROWAGG_SUM) + oidsAggDist.push_back(oidsAggUm[colUm]); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(0); + if (isUnsigned(typeAggUm[colUm])) { - oidsAggDist.push_back(oidsAggUm[colUm]); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(scaleAggUm[colUm] >> 8); - precisionAggDist.push_back(precisionAggUm[colUm]); - typeAggDist.push_back(typeAggUm[colUm]); - widthAggDist.push_back(widthAggUm[colUm]); + precisionAggDist.push_back(20); + typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); } else { - // leave the count() to avg - aggOp = ROWAGG_COUNT_NO_OP; - - oidsAggDist.push_back(oidsAggUm[colUm]); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(0); - - if (isUnsigned(typeAggUm[colUm])) - { - precisionAggDist.push_back(20); - typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); - } - else - { - precisionAggDist.push_back(19); - typeAggDist.push_back(CalpontSystemCatalog::BIGINT); - } - - widthAggDist.push_back(bigIntWidth); + precisionAggDist.push_back(19); + typeAggDist.push_back(CalpontSystemCatalog::BIGINT); } + widthAggDist.push_back(bigIntWidth); } } - else if (find(jobInfo.expressionVec.begin(), jobInfo.expressionVec.end(), - retKey) != jobInfo.expressionVec.end()) - { - // a function on aggregation - TupleInfo ti = getTupleInfo(retKey, jobInfo); - oidsAggDist.push_back(ti.oid); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(ti.scale); - precisionAggDist.push_back(ti.precision); - typeAggDist.push_back(ti.dtype); - widthAggDist.push_back(ti.width); + } + else if (find(jobInfo.expressionVec.begin(), jobInfo.expressionVec.end(), + retKey) != jobInfo.expressionVec.end()) + { + // a function on aggregation + TupleInfo ti = getTupleInfo(retKey, jobInfo); + oidsAggDist.push_back(ti.oid); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(ti.scale); + precisionAggDist.push_back(ti.precision); + typeAggDist.push_back(ti.dtype); + widthAggDist.push_back(ti.width); - returnColMissing = false; - } - else if (jobInfo.windowSet.find(retKey) != jobInfo.windowSet.end()) - { - // a window function - TupleInfo ti = getTupleInfo(retKey, jobInfo); - oidsAggDist.push_back(ti.oid); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(ti.scale); - precisionAggDist.push_back(ti.precision); - typeAggDist.push_back(ti.dtype); - widthAggDist.push_back(ti.width); + returnColMissing = false; + } + else if (jobInfo.windowSet.find(retKey) != jobInfo.windowSet.end()) + { + // a window function + TupleInfo ti = getTupleInfo(retKey, jobInfo); + oidsAggDist.push_back(ti.oid); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(ti.scale); + precisionAggDist.push_back(ti.precision); + typeAggDist.push_back(ti.dtype); + widthAggDist.push_back(ti.width); - returnColMissing = false; - } - else if (aggOp == ROWAGG_CONSTANT) - { - TupleInfo ti = getTupleInfo(retKey, jobInfo); - oidsAggDist.push_back(ti.oid); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(ti.scale); - precisionAggDist.push_back(ti.precision); - typeAggDist.push_back(ti.dtype); - widthAggDist.push_back(ti.width); + returnColMissing = false; + } + else if (aggOp == ROWAGG_CONSTANT) + { + TupleInfo ti = getTupleInfo(retKey, jobInfo); + oidsAggDist.push_back(ti.oid); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(ti.scale); + precisionAggDist.push_back(ti.precision); + typeAggDist.push_back(ti.dtype); + widthAggDist.push_back(ti.width); - returnColMissing = false; - } + returnColMissing = false; + } - if (returnColMissing) - { - Message::Args args; - args.add(keyName(outIdx, retKey, jobInfo)); - string emsg = IDBErrorInfo::instance()-> - errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); - cerr << "prep2PhasesDistinctAggregate: " << emsg << " oid=" - << (int) jobInfo.keyInfo->tupleKeyVec[retKey].fId << ", alias=" - << jobInfo.keyInfo->tupleKeyVec[retKey].fTable << ", view=" - << jobInfo.keyInfo->tupleKeyVec[retKey].fView << ", function=" - << (int) aggOp << endl; - throw IDBExcept(emsg, ERR_NOT_GROUPBY_EXPRESSION); - } - } //else - } // switch - } + if (returnColMissing) + { + Message::Args args; + args.add(keyName(outIdx, retKey, jobInfo)); + string emsg = IDBErrorInfo::instance()-> + errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); + cerr << "prep2PhasesDistinctAggregate: " << emsg << " oid=" + << (int) jobInfo.keyInfo->tupleKeyVec[retKey].fId << ", alias=" + << jobInfo.keyInfo->tupleKeyVec[retKey].fTable << ", view=" + << jobInfo.keyInfo->tupleKeyVec[retKey].fView << ", function=" + << (int) aggOp << endl; + throw IDBExcept(emsg, ERR_NOT_GROUPBY_EXPRESSION); + } + } //else not a direct hit + } // else not a DISTINCT // update groupby vector if the groupby column is a returned column if (returnedColVec[i].second == 0) @@ -4757,7 +4656,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( { // update the aggregate function vector SP_ROWAGG_FUNC_t funct; - if (aggOp == ROWAGG_UDAF) { funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colUm, outIdx)); @@ -4801,7 +4699,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( else if (returnedColVec[i].second == AggregateColumn::DISTINCT_AVG) avgDistFuncMap.insert(make_pair(returnedColVec[i].first, funct)); } - ++outIdx; } // for (i @@ -5044,7 +4941,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( ++multiParms; continue; } - if (returnedColVec[k].first != distinctColKey) continue; @@ -5066,7 +4962,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, groupBySub.size() - 1, f->fOutputColumnIndex, - f->fAuxColumnIndex - multiParms)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } @@ -5092,7 +4988,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( ++multiParms; continue; } - // search non-distinct functions in functionVec vector::iterator it = functionVecUm.begin(); @@ -5110,7 +5005,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( udafFuncCol->fUDAFContext, udafFuncCol->fInputColumnIndex, udafFuncCol->fOutputColumnIndex, - udafFuncCol->fAuxColumnIndex - multiParms)); + udafFuncCol->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } else if (f->fAggFunction == ROWAGG_COUNT_ASTERISK || @@ -5131,7 +5026,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, - f->fAuxColumnIndex - multiParms)); + f->fAuxColumnIndex-multiParms)); functionSub2.push_back(funct); } } diff --git a/utils/udfsdk/CMakeLists.txt b/utils/udfsdk/CMakeLists.txt index ad4460977..c4d7fa574 100755 --- a/utils/udfsdk/CMakeLists.txt +++ b/utils/udfsdk/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ########### next target ############### -set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp avg_mode.cpp regr_avgx.cpp avgx.cpp) +set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp avgx.cpp) add_definitions(-DMYSQL_DYNAMIC_PLUGIN) diff --git a/utils/udfsdk/mcsv1_udaf.cpp b/utils/udfsdk/mcsv1_udaf.cpp index 9e4596440..2a93cfad3 100644 --- a/utils/udfsdk/mcsv1_udaf.cpp +++ b/utils/udfsdk/mcsv1_udaf.cpp @@ -31,14 +31,21 @@ using namespace mcsv1sdk; * This is a temporary kludge until we get the library loader * task complete */ -UDAF_MAP UDAFMap::fm; #include "allnull.h" #include "ssq.h" +#include "median.h" #include "avg_mode.h" -#include "regr_avgx.h" #include "avgx.h" + +UDAF_MAP& UDAFMap::fm() +{ + static UDAF_MAP* m = new UDAF_MAP; + return *m; +} + UDAF_MAP& UDAFMap::getMap() { + UDAF_MAP& fm = UDAFMap::fm(); if (fm.size() > 0) { return fm; @@ -51,8 +58,8 @@ UDAF_MAP& UDAFMap::getMap() // the function names passed to the interface is always in lower case. fm["allnull"] = new allnull(); fm["ssq"] = new ssq(); + fm["median"] = new median(); fm["avg_mode"] = new avg_mode(); - fm["regr_avgx"] = new regr_avgx(); fm["avgx"] = new avgx(); return fm; diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index e09228d77..28db6808b 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -108,7 +108,7 @@ public: static EXPORT UDAF_MAP& getMap(); private: - static UDAF_MAP fm; + static UDAF_MAP& fm(); }; /** diff --git a/utils/udfsdk/udfmysql.cpp b/utils/udfsdk/udfmysql.cpp index 1c0fee1db..60da18a43 100644 --- a/utils/udfsdk/udfmysql.cpp +++ b/utils/udfsdk/udfmysql.cpp @@ -349,6 +349,78 @@ extern "C" return data->sumsq; } +//======================================================================= + + /** + * MEDIAN connector stub + */ +#ifdef _MSC_VER + __declspec(dllexport) +#endif + my_bool median_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + if (args->arg_count != 1) + { + strcpy(message, "median() requires one argument"); + return 1; + } + + /* + if (!(data = (struct ssq_data*) malloc(sizeof(struct ssq_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumsq = 0; + + initid->ptr = (char*)data; + */ + return 0; + } + +#ifdef _MSC_VER + __declspec(dllexport) +#endif + void median_deinit(UDF_INIT* initid) + { +// free(initid->ptr); + } + +#ifdef _MSC_VER + __declspec(dllexport) +#endif + void + median_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { +// struct ssq_data* data = (struct ssq_data*)initid->ptr; +// data->sumsq = 0; + } + +#ifdef _MSC_VER + __declspec(dllexport) +#endif + void + median_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { +// struct ssq_data* data = (struct ssq_data*)initid->ptr; +// double val = cvtArgToDouble(args->arg_type[0], args->args[0]); +// data->sumsq = val*val; + } + +#ifdef _MSC_VER + __declspec(dllexport) +#endif + long long median(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { +// struct ssq_data* data = (struct ssq_data*)initid->ptr; +// return data->sumsq; + return 0; + } + /** * avg_mode connector stub */ @@ -422,167 +494,83 @@ extern "C" //======================================================================= /** - * regr_avgx connector stub - */ - struct regr_avgx_data - { - double sumx; - int64_t cnt; - }; - -#ifdef _MSC_VER - __declspec(dllexport) -#endif - my_bool regr_avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) - { - struct regr_avgx_data* data; - - if (args->arg_count != 2) - { - strcpy(message, "regr_avgx() requires two arguments"); - return 1; - } - - if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data)))) - { - strmov(message, "Couldn't allocate memory"); - return 1; - } - - data->sumx = 0; - data->cnt = 0; - - initid->ptr = (char*)data; - return 0; - } - -#ifdef _MSC_VER - __declspec(dllexport) -#endif - void regr_avgx_deinit(UDF_INIT* initid) - { - free(initid->ptr); - } - -#ifdef _MSC_VER - __declspec(dllexport) -#endif - void - regr_avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), - char* message __attribute__((unused))) - { - struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; - data->sumx = 0; - data->cnt = 0; - } - -#ifdef _MSC_VER - __declspec(dllexport) -#endif - void - regr_avgx_add(UDF_INIT* initid, UDF_ARGS* args, - char* is_null, - char* message __attribute__((unused))) - { - // TODO test for NULL in x and y - struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; - double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); - ++data->cnt; - data->sumx += xval; - } - -#ifdef _MSC_VER - __declspec(dllexport) -#endif - long long regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), - char* is_null, char* error __attribute__((unused))) - { - struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; - return data->sumx / data->cnt; - } - -//======================================================================= - - /** - * avgx connector stub. Exactly the same functionality as the - * built in avg() function. Use to test the performance of the - * API + * avgx connector stub. Exactly the same functionality as the + * built in avg() function. Use to test the performance of the + * API */ struct avgx_data { - double sumx; - int64_t cnt; + double sumx; + int64_t cnt; }; - -#ifdef _MSC_VER + + #ifdef _MSC_VER __declspec(dllexport) -#endif + #endif my_bool avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) { - struct avgx_data* data; + struct avgx_data* data; + if (args->arg_count != 1) + { + strcpy(message,"avgx() requires one argument"); + return 1; + } - if (args->arg_count != 1) - { - strcpy(message, "avgx() requires one argument"); - return 1; - } - - if (!(data = (struct avgx_data*) malloc(sizeof(struct avgx_data)))) - { - strmov(message, "Couldn't allocate memory"); - return 1; - } - - data->sumx = 0; + if (!(data = (struct avgx_data*) malloc(sizeof(struct avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; data->cnt = 0; - initid->ptr = (char*)data; - return 0; + initid->ptr = (char*)data; + return 0; } -#ifdef _MSC_VER + #ifdef _MSC_VER __declspec(dllexport) -#endif + #endif void avgx_deinit(UDF_INIT* initid) { - free(initid->ptr); - } + free(initid->ptr); + } -#ifdef _MSC_VER + #ifdef _MSC_VER __declspec(dllexport) -#endif + #endif void avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), - char* message __attribute__((unused))) + char* message __attribute__((unused))) { - struct avgx_data* data = (struct avgx_data*)initid->ptr; - data->sumx = 0; + struct avgx_data* data = (struct avgx_data*)initid->ptr; + data->sumx = 0; data->cnt = 0; } -#ifdef _MSC_VER + #ifdef _MSC_VER __declspec(dllexport) -#endif + #endif void avgx_add(UDF_INIT* initid, UDF_ARGS* args, - char* is_null, - char* message __attribute__((unused))) + char* is_null, + char* message __attribute__((unused))) { // TODO test for NULL in x and y - struct avgx_data* data = (struct avgx_data*)initid->ptr; - double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); + struct avgx_data* data = (struct avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[0]); ++data->cnt; - data->sumx += xval; + data->sumx += xval; } -#ifdef _MSC_VER + #ifdef _MSC_VER __declspec(dllexport) -#endif + #endif long long avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), - char* is_null, char* error __attribute__((unused))) + char* is_null, char* error __attribute__((unused))) { - struct avgx_data* data = (struct avgx_data*)initid->ptr; - return data->sumx / data->cnt; + struct avgx_data* data = (struct avgx_data*)initid->ptr; + return data->sumx / data->cnt; } } // vim:ts=4 sw=4: diff --git a/utils/udfsdk/udfsdk.vpj b/utils/udfsdk/udfsdk.vpj index fe1f3fd0e..1096f8431 100755 --- a/utils/udfsdk/udfsdk.vpj +++ b/utils/udfsdk/udfsdk.vpj @@ -207,7 +207,6 @@ - @@ -220,7 +219,6 @@ - From 94455246a10dfa25c41caa6daeffdc66c25e4bbd Mon Sep 17 00:00:00 2001 From: David Hall Date: Thu, 16 Aug 2018 11:56:48 -0500 Subject: [PATCH 02/32] MCOL-521 Move regr functions to their own directrory --- utils/CMakeLists.txt | 2 + utils/regr/regr.vpj | 220 ++++++++++++++++ utils/{udfsdk => regr}/regr_avgx.cpp | 15 ++ utils/{udfsdk => regr}/regr_avgx.h | 2 +- utils/regr/regr_avgy.cpp | 281 ++++++++++++++++++++ utils/regr/regr_avgy.h | 88 +++++++ utils/regr/regr_count.cpp | 131 ++++++++++ utils/regr/regr_count.h | 88 +++++++ utils/regr/regrmysql.cpp | 374 +++++++++++++++++++++++++++ utils/utils.vpj | 4 +- 10 files changed, 1203 insertions(+), 2 deletions(-) create mode 100644 utils/regr/regr.vpj rename utils/{udfsdk => regr}/regr_avgx.cpp (95%) rename utils/{udfsdk => regr}/regr_avgx.h (99%) create mode 100644 utils/regr/regr_avgy.cpp create mode 100644 utils/regr/regr_avgy.h create mode 100644 utils/regr/regr_count.cpp create mode 100644 utils/regr/regr_count.h create mode 100644 utils/regr/regrmysql.cpp diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index dba39fe86..c4486ddd5 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -25,3 +25,5 @@ add_subdirectory(thrift) add_subdirectory(querytele) add_subdirectory(clusterTester) add_subdirectory(libmysql_client) +add_subdirectory(regr) + diff --git a/utils/regr/regr.vpj b/utils/regr/regr.vpj new file mode 100644 index 000000000..d99a1d436 --- /dev/null +++ b/utils/regr/regr.vpj @@ -0,0 +1,220 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/utils/udfsdk/regr_avgx.cpp b/utils/regr/regr_avgx.cpp similarity index 95% rename from utils/udfsdk/regr_avgx.cpp rename to utils/regr/regr_avgx.cpp index e99871f97..1230df84c 100644 --- a/utils/udfsdk/regr_avgx.cpp +++ b/utils/regr/regr_avgx.cpp @@ -24,6 +24,17 @@ using namespace mcsv1sdk; +class Add_regr_avgx_ToUDAFMap +{ +public: + Add_regr_avgx_ToUDAFMap() + { + UDAFMap::getMap()["regr_avgx"] = new regr_avgx(); + } +}; + +static Add_regr_avgx_ToUDAFMap addToMap; + #define DATATYPE double // Use the simple data model @@ -196,6 +207,10 @@ mcsv1_UDAF::ReturnCode regr_avgx::dropValue(mcsv1Context* context, ColumnDatum* struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; DATATYPE val = 0.0; + if (context->isParamNull(0) || context->isParamNull(1)) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } if (valIn_x.empty() || valIn_y.empty()) { return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. diff --git a/utils/udfsdk/regr_avgx.h b/utils/regr/regr_avgx.h similarity index 99% rename from utils/udfsdk/regr_avgx.h rename to utils/regr/regr_avgx.h index 27b8708f7..75791f769 100644 --- a/utils/udfsdk/regr_avgx.h +++ b/utils/regr/regr_avgx.h @@ -26,7 +26,7 @@ * * * CREATE AGGREGATE FUNCTION regr_avgx returns REAL soname - * 'libudf_mysql.so'; + * 'libregr_mysql.so'; * */ #ifndef HEADER_regr_avgx diff --git a/utils/regr/regr_avgy.cpp b/utils/regr/regr_avgy.cpp new file mode 100644 index 000000000..667019a33 --- /dev/null +++ b/utils/regr/regr_avgy.cpp @@ -0,0 +1,281 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_avgy.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +class Add_regr_avgy_ToUDAFMap +{ +public: + Add_regr_avgy_ToUDAFMap() + { + UDAFMap::getMap()["regr_avgy"] = new regr_avgy(); + } +}; + +static Add_regr_avgy_ToUDAFMap addToMap; + +#define DATATYPE double + +// Use the simple data model +struct regr_avgy_data +{ + double sum; + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode regr_avgy::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgy() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + if (!(isNumeric(colTypes[0].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgy() with a non-numeric x argument"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_avgy_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 4); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_avgy::reset(mcsv1Context* context) +{ + struct regr_avgy_data* data = (struct regr_avgy_data*)context->getUserData()->data; + data->sum = 0; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgy::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_avgy_data* data = (struct regr_avgy_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (context->isParamNull(0) || context->isParamNull(1)) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + if (valIn_x.empty() || valIn_y.empty()) // Usually empty if NULL. Probably redundant + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_y.compatible(longTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(charTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(scharTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(shortTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(intTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(llTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(ucharTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(ushortTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(uintTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(ulongTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(ullTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(floatTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(doubleTypeId)) + { + val = valIn_y.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsIn[0].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum += val; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgy::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_avgy_data* outData = (struct regr_avgy_data*)context->getUserData()->data; + struct regr_avgy_data* inData = (struct regr_avgy_data*)userDataIn->data; + + outData->sum += inData->sum; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgy::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_avgy_data* data = (struct regr_avgy_data*)context->getUserData()->data; + + if (data->cnt == 0) + { + valOut = 0; + } + else + { + valOut = data->sum / (double)data->cnt; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_avgy::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_avgy_data* data = (struct regr_avgy_data*)context->getUserData()->data; + DATATYPE val = 0.0; + + if (context->isParamNull(0) || context->isParamNull(1)) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + if (valIn_x.empty() || valIn_y.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + if (valIn_y.compatible(charTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(scharTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(shortTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(intTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(longTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(llTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(ucharTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(ushortTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(uintTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(ulongTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(ullTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(floatTypeId)) + { + val = valIn_y.cast(); + } + else if (valIn_y.compatible(doubleTypeId)) + { + val = valIn_y.cast(); + } + + // For decimal types, we need to move the decimal point. + uint32_t scale = valsDropped[0].scale; + + if (val != 0 && scale > 0) + { + val /= pow(10.0, (double)scale); + } + + data->sum -= val; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/regr/regr_avgy.h b/utils/regr/regr_avgy.h new file mode 100644 index 000000000..c99021f9f --- /dev/null +++ b/utils/regr/regr_avgy.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_avgy.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_avgy function + * + * + * CREATE AGGREGATE FUNCTION regr_avgy returns REAL soname + * 'libregr_mysql.so'; + * + */ +#ifndef HEADER_regr_avgy +#define HEADER_regr_avgy + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Return the regr_avgy value of the dataset + +class regr_avgy : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_avgy() : mcsv1_UDAF() {}; + virtual ~regr_avgy() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_avgy.h + diff --git a/utils/regr/regr_count.cpp b/utils/regr/regr_count.cpp new file mode 100644 index 000000000..c65a1f4a6 --- /dev/null +++ b/utils/regr/regr_count.cpp @@ -0,0 +1,131 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_count.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +class Add_regr_count_ToUDAFMap +{ +public: + Add_regr_count_ToUDAFMap() + { + UDAFMap::getMap()["regr_count"] = new regr_count(); + } +}; + +static Add_regr_count_ToUDAFMap addToMap; + +// Use the simple data model +struct regr_count_data +{ + uint64_t cnt; +}; + + +mcsv1_UDAF::ReturnCode regr_count::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_count() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_count_data)); + context->setResultType(CalpontSystemCatalog::BIGINT); + context->setColWidth(8); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_count::reset(mcsv1Context* context) +{ + struct regr_count_data* data = (struct regr_count_data*)context->getUserData()->data; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_count::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_count_data* data = (struct regr_count_data*)context->getUserData()->data; + + if (context->isParamNull(0) || context->isParamNull(1)) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + if (valIn_x.empty() || valIn_y.empty()) // Usually empty if NULL. Probably redundant + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_count::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_count_data* outData = (struct regr_count_data*)context->getUserData()->data; + struct regr_count_data* inData = (struct regr_count_data*)userDataIn->data; + + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_count::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_count_data* data = (struct regr_count_data*)context->getUserData()->data; + + valOut = data->cnt; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_count::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_count_data* data = (struct regr_count_data*)context->getUserData()->data; + + if (context->isParamNull(0) || context->isParamNull(1)) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + if (valIn_x.empty() || valIn_y.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/regr/regr_count.h b/utils/regr/regr_count.h new file mode 100644 index 000000000..4f4fc558e --- /dev/null +++ b/utils/regr/regr_count.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_count.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_count function + * + * + * CREATE AGGREGATE FUNCTION regr_count returns INTEGER + * soname 'libregr_mysql.so'; + * + */ +#ifndef HEADER_regr_count +#define HEADER_regr_count + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Return the regr_count value of the dataset + +class regr_count : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_count() : mcsv1_UDAF() {}; + virtual ~regr_count() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_count.h + diff --git a/utils/regr/regrmysql.cpp b/utils/regr/regrmysql.cpp new file mode 100644 index 000000000..6870f3050 --- /dev/null +++ b/utils/regr/regrmysql.cpp @@ -0,0 +1,374 @@ +#include +#include +#include +#include +using namespace std; + +#include "idb_mysql.h" + +namespace +{ +inline double cvtArgToDouble(int t, const char* v) +{ + double d = 0.0; + + switch (t) + { + case INT_RESULT: + d = (double)(*((long long*)v)); + break; + + case REAL_RESULT: + d = *((double*)v); + break; + + case DECIMAL_RESULT: + case STRING_RESULT: + d = strtod(v, 0); + break; + + case ROW_RESULT: + break; + } + + return d; +} +inline long long cvtArgToInt(int t, const char* v) +{ + long long ll = 0; + + switch (t) + { + case INT_RESULT: + ll = *((long long*)v); + break; + + case REAL_RESULT: + ll = (long long)(*((double*)v)); + break; + + case DECIMAL_RESULT: + case STRING_RESULT: + ll = strtoll(v, 0, 0); + break; + + case ROW_RESULT: + break; + } + + return ll; +} +inline string cvtArgToString(int t, const char* v) +{ + string str; + + switch (t) + { + case INT_RESULT: + { + long long ll; + ll = *((long long*)v); + ostringstream oss; + oss << ll; + str = oss.str(); + break; + } + + case REAL_RESULT: + { + double d; + d = *((double*)v); + ostringstream oss; + oss << d; + str = oss.str(); + break; + } + + case DECIMAL_RESULT: + case STRING_RESULT: + str = v; + break; + + case ROW_RESULT: + break; + } + + return str; +} +} + +/**************************************************************************** + * UDF function interface for MariaDB connector to recognize is defined in + * this section. MariaDB's UDF function creation guideline needs to be followed. + * + * Three interface need to be defined on the connector for each UDF function. + * + * XXX_init: To allocate the necessary memory for the UDF function and validate + * the input. + * XXX_deinit: To clean up the memory. + * XXX: The function implementation. + * Detailed instruction can be found at MariaDB source directory: + * ~/sql/udf_example.cc. + * + * Please note that the implementation of the function defined on the connector + * will only be called when all the input arguments are constant. e.g., + * mcs_add(2,3). That way, the function does not run in a distributed fashion + * and could be slow. If there is a need for the UDF function to run with + * pure constant input, then one needs to put a implementation in the XXX + * body, which is very similar to the ones in getXXXval API. If there's no + * such need for a given UDF, then the XXX interface can just return a dummy + * result because this function will never be called. + */ +extern "C" +{ + +//======================================================================= + + /** + * regr_avgx connector stub + */ + struct regr_avgx_data + { + double sumx; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_avgx_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_avgx() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumx = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_avgx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + data->sumx = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // Test for NULL in x and y + if (args->args[0] == 0 || args->args[1] == 0) + { + return; + } + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[1]); + ++data->cnt; + data->sumx += xval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; + return data->sumx / data->cnt; + } + +//======================================================================= + + /** + * regr_avgy connector stub + */ + struct regr_avgy_data + { + double sumy; + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_avgy_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_avgy_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_avgy() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_avgy_data*) malloc(sizeof(struct regr_avgy_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->sumy = 0; + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_avgy_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgy_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_avgy_data* data = (struct regr_avgy_data*)initid->ptr; + data->sumy = 0; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_avgy_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // Test for NULL in x and y + if (args->args[0] == 0 || args->args[1] == 0) + { + return; + } + struct regr_avgy_data* data = (struct regr_avgy_data*)initid->ptr; + double yval = cvtArgToDouble(args->arg_type[0], args->args[0]); + ++data->cnt; + data->sumy += yval; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long regr_avgy(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_avgy_data* data = (struct regr_avgy_data*)initid->ptr; + return data->sumy / data->cnt; + } + +//======================================================================= + + /** + * regr_count connector stub + */ + struct regr_count_data + { + int64_t cnt; + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_count_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_count_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_count() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_count_data*) malloc(sizeof(struct regr_count_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->cnt = 0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_count_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_count_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_count_data* data = (struct regr_count_data*)initid->ptr; + data->cnt = 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_count_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // Test for NULL in x and y + if (args->args[0] == 0 || args->args[1] == 0) + { + return; + } + struct regr_count_data* data = (struct regr_count_data*)initid->ptr; + ++data->cnt; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + long long regr_count(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_count_data* data = (struct regr_count_data*)initid->ptr; + return data->cnt; + } +//======================================================================= +} +// vim:ts=4 sw=4: + diff --git a/utils/utils.vpj b/utils/utils.vpj index 53da962f3..d81586008 100755 --- a/utils/utils.vpj +++ b/utils/utils.vpj @@ -292,6 +292,8 @@ Filters="*.bmp"/> + Filters=""> + + From f4af014435f90b129aba21caa8064a18362184fa Mon Sep 17 00:00:00 2001 From: David Hall Date: Thu, 16 Aug 2018 17:10:52 -0500 Subject: [PATCH 03/32] MCOL-521 Put regr functions in their own library --- CMakeLists.txt | 2 +- dbcon/joblist/tupleaggregatestep.cpp | 13 +++++++------ oam/install_scripts/post-mysql-install | 3 +++ oam/install_scripts/post-mysqld-install | 3 +++ 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 30eb38f6c..e5e96dbeb 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -155,7 +155,7 @@ SET (ENGINE_TOOLSDIR "${INSTALL_ENGINE}/tools") SET (ENGINE_COMMON_LIBS messageqcpp loggingcpp configcpp idbboot ${Boost_LIBRARIES} xml2 pthread rt libmysql_client) SET (ENGINE_OAM_LIBS oamcpp alarmmanager) SET (ENGINE_BRM_LIBS brm idbdatafile cacheutils rwlock ${ENGINE_OAM_LIBS} ${ENGINE_COMMON_LIBS}) -SET (ENGINE_EXEC_LIBS joblist execplan windowfunction joiner rowgroup funcexp udfsdk dataconvert common compress querystats querytele thrift threadpool ${ENGINE_BRM_LIBS}) +SET (ENGINE_EXEC_LIBS joblist execplan windowfunction joiner rowgroup funcexp udfsdk regr dataconvert common compress querystats querytele thrift threadpool ${ENGINE_BRM_LIBS}) SET (ENGINE_WRITE_LIBS ddlpackageproc ddlpackage dmlpackageproc dmlpackage writeengine writeengineclient idbdatafile cacheutils ${ENGINE_EXEC_LIBS}) SET (ENGINE_COMMON_LDFLAGS "") diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 429d5821d..97a577f4f 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -3876,6 +3876,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // column index for PM aggregate rowgroup uint64_t colAggPm = 0; + uint64_t multiParm = 0; // for groupby column for (uint64_t i = 0; i < jobInfo.groupByColVec.size(); i++) @@ -4027,7 +4028,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( continue; functionVecPm.push_back(funct); - aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc), colAggPm)); + aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc), colAggPm-multiParm)); switch (aggOp) { @@ -4247,7 +4248,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( typeAggPm.push_back(typeProj[colProj]); widthAggPm.push_back(width[colProj]); multiParmIndexes.push_back(colAggPm); - colAggPm++; + ++colAggPm; + ++multiParm; // If the param is const if (udafc) { @@ -4414,7 +4416,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it != aggFuncMap.end()) { - colUm = it->second - multiParms; + colUm = it->second; } } @@ -4517,14 +4519,13 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (it != aggFuncMap.end()) { - colUm = it->second - multiParms; + colUm = it->second; oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(keysAggUm[colUm]); scaleAggDist.push_back(scaleAggUm[colUm]); precisionAggDist.push_back(precisionAggUm[colUm]); typeAggDist.push_back(typeAggUm[colUm]); widthAggDist.push_back(widthAggUm[colUm]); - colUm -= multiParms; } // not a direct hit -- a returned column is not already in the RG from PMs @@ -4542,7 +4543,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // false alarm returnColMissing = false; - colUm = it->second - multiParms; + colUm = it->second; if (aggOp == ROWAGG_SUM) { diff --git a/oam/install_scripts/post-mysql-install b/oam/install_scripts/post-mysql-install index 24711b048..421d5b77f 100755 --- a/oam/install_scripts/post-mysql-install +++ b/oam/install_scripts/post-mysql-install @@ -72,9 +72,11 @@ fi if [ -f $installdir/lib/libcalmysql.so.1.0.0 ]; then libcalmysql=$installdir/lib/libcalmysql.so.1.0.0 libudfsdk=$installdir/lib/libudf_mysql.so.1.0.0 + libregrsdk=$installdir/lib/libregr_mysql.so.1.0.0 elif [ -f $installdir/lib/libcalmysql.so.1 ]; then libcalmysql=$installdir/lib/libcalmysql.so.1 libudfsdk=$installdir/lib/libudf_mysql.so.1 + libregrsdk=$installdir/lib/libregr_mysql.so.1 else libcalmysql= fi @@ -84,6 +86,7 @@ if [ -d $installdir/mysql/lib64/mysql/plugin -a -n "$libcalmysql" ]; then ln -sf $libcalmysql libcalmysql.so ln -sf $libcalmysql libcalmysqlent.so ln -sf $libudfsdk libudf_mysql.so + ln -sf $libregrsdk libregr_mysql.so fi if [ $installdir != "/usr/local/mariadb/columnstore" ]; then diff --git a/oam/install_scripts/post-mysqld-install b/oam/install_scripts/post-mysqld-install index 58f2b3d65..e712b2813 100755 --- a/oam/install_scripts/post-mysqld-install +++ b/oam/install_scripts/post-mysqld-install @@ -83,6 +83,7 @@ chown -R $user.$user $installdir/mysql if [ -f $installdir/lib/libcalmysql.so.1.0.0 ]; then libcalmysql=$installdir/lib/libcalmysql.so.1.0.0 libudfsdk=$installdir/lib/libudf_mysql.so.1.0.0 + libregrsdk=$installdir/lib/libregr_mysql.so.1.0.0 is_columnstore_tables=$installdir/lib/is_columnstore_tables.so.1.0.0 is_columnstore_columns=$installdir/lib/is_columnstore_columns.so.1.0.0 is_columnstore_extents=$installdir/lib/is_columnstore_extents.so.1.0.0 @@ -90,6 +91,7 @@ if [ -f $installdir/lib/libcalmysql.so.1.0.0 ]; then elif [ -f $installdir/lib/libcalmysql.so.1 ]; then libcalmysql=$installdir/lib/libcalmysql.so.1 libudfsdk=$installdir/lib/libudf_mysql.so.1 + libregrsdk=$installdir/lib/libregr_mysql.so.1 is_columnstore_tables=$installdir/lib/is_columnstore_tables.so.1 is_columnstore_columns=$installdir/lib/is_columnstore_columns.so.1 is_columnstore_extents=$installdir/lib/is_columnstore_extents.so.1 @@ -104,6 +106,7 @@ if [ -n "$libcalmysql" ]; then ln -sf $libcalmysql libcalmysql.so ln -sf $libcalmysql libcalmysqlent.so ln -sf $libudfsdk libudf_mysql.so + ln -sf $libregrsdk libregr_mysql.so ln -sf $is_columnstore_tables is_columnstore_tables.so ln -sf $is_columnstore_columns is_columnstore_columns.so ln -sf $is_columnstore_extents is_columnstore_extents.so From 07bd4130530b228b8ef8d3fa575f1d0c3e375ec4 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Fri, 17 Aug 2018 22:27:02 +0300 Subject: [PATCH 04/32] MCOL-1660/1659 Table/column identifiers support spaces in DDL. MCOL-1660/1659 Table/column identifiers support spaces in DDL. --- dbcon/ddlpackage/ddl.l | 6 +++--- dbcon/ddlpackage/ddl.y | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbcon/ddlpackage/ddl.l b/dbcon/ddlpackage/ddl.l index 7f9362cee..926a836e2 100644 --- a/dbcon/ddlpackage/ddl.l +++ b/dbcon/ddlpackage/ddl.l @@ -72,9 +72,9 @@ ident_start [A-Za-z\200-\377_] ident_cont [A-Za-z\200-\377_0-9\$] identifier {ident_start}{ident_cont}* /* fully qualified names regexes */ -fq_identifier {identifier}\.{identifier} -identifier_quoted {grave_accent}{identifier}{grave_accent} -identifier_double_quoted {double_quote}{identifier}{double_quote} +ident_w_spaces {identifier}\x20* +identifier_quoted {grave_accent}{ident_w_spaces}+{grave_accent} +identifier_double_quoted {double_quote}{ident_w_spaces}+{double_quote} integer [-+]?{digit}+ decimal ([-+]?({digit}*\.{digit}+)|({digit}+\.{digit}*)) diff --git a/dbcon/ddlpackage/ddl.y b/dbcon/ddlpackage/ddl.y index 2556b8340..c9fc805ed 100644 --- a/dbcon/ddlpackage/ddl.y +++ b/dbcon/ddlpackage/ddl.y @@ -604,7 +604,7 @@ table_name: ; qualified_name: - | ident { + ident { if (x->fDBSchema.size()) $$ = new QualifiedName((char*)x->fDBSchema.c_str(), $1); else From 4572c25534f6fbbbd877e1dcd460cfc6ef75d996 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Tue, 28 Aug 2018 11:29:38 +0300 Subject: [PATCH 05/32] MCOL-1675 When insert record calculate HWM using a column with the smallest width instead of the first column in the same way as in MCOL-984. --- writeengine/wrapper/writeengine.cpp | 111 +++++++++++++++++++--------- writeengine/wrapper/writeengine.h | 5 ++ 2 files changed, 82 insertions(+), 34 deletions(-) diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index afea06fee..d163d1f42 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -182,6 +182,37 @@ int WriteEngineWrapper::checkValid(const TxnID& txnid, const ColStructList& colS return NO_ERROR; } +/*@brief findSmallestColumn --Find the smallest column for this table + */ +/*********************************************************** + * DESCRIPTION: + * Find the smallest column for this table + * PARAMETERS: + * lowColLen - returns smallest column width + * colId - returns smallest column id + * colStructList - column struct list + * RETURN: + * void + ***********************************************************/ +void WriteEngineWrapper::findSmallestColumn(uint32_t& colId, ColStructList colStructList) +// MCOL-1675: find the smallest column width to calculate the RowID from so +// that all HWMs will be incremented by this operation +{ + int32_t lowColLen = 8192; + for (uint32_t colIt = 0; colIt < colStructList.size(); colIt++) + { + if (colStructList[colIt].colWidth < lowColLen) + { + colId = colIt; + lowColLen = colStructList[colId].colWidth; + if ( lowColLen == 1 ) + { + break; + } + } + } +} + /*@convertValArray - Convert interface values to internal values */ /*********************************************************** @@ -847,6 +878,11 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, for (i = 0; i < colStructList.size(); i++) Convertor::convertColType(&colStructList[i]); + uint32_t colId = 0; + // MCOL-1675: find the smallest column width to calculate the RowID from so + // that all HWMs will be incremented by this operation + findSmallestColumn(colId, colStructList); + // rc = checkValid(txnid, colStructList, colValueList, ridList); // if (rc != NO_ERROR) // return rc; @@ -873,8 +909,8 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, //-------------------------------------------------------------------------- if (isFirstBatchPm) { - currentDBrootIdx = dbRootExtentTrackers[0]->getCurrentDBRootIdx(); - extentInfo = dbRootExtentTrackers[0]->getDBRootExtentList(); + currentDBrootIdx = dbRootExtentTrackers[colId]->getCurrentDBRootIdx(); + extentInfo = dbRootExtentTrackers[colId]->getDBRootExtentList(); dbRoot = extentInfo[currentDBrootIdx].fDbRoot; partitionNum = extentInfo[currentDBrootIdx].fPartition; @@ -914,7 +950,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, { colOp = m_colOp[op(colStructList[i].fCompressionType)]; colOp->initColumn(curCol); - colOp->setColParam(curCol, 0, colStructList[i].colWidth, colStructList[i].colDataType, + colOp->setColParam(curCol, colId, colStructList[i].colWidth, colStructList[i].colDataType, colStructList[i].colType, colStructList[i].dataOid, colStructList[i].fCompressionType, dbRoot, partitionNum, segmentNum); rc = colOp->extendColumn(curCol, false, extents[i].startBlkOffset, extents[i].startLbid, extents[i].allocSize, dbRoot, @@ -1040,7 +1076,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, } // if (isFirstBatchPm) else //get the extent info from tableMetaData { - ColExtsInfo aColExtsInfo = tableMetaData->getColExtsInfo(colStructList[0].dataOid); + ColExtsInfo aColExtsInfo = tableMetaData->getColExtsInfo(colStructList[colId].dataOid); ColExtsInfo::iterator it = aColExtsInfo.begin(); while (it != aColExtsInfo.end()) { @@ -1073,7 +1109,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, //-------------------------------------------------------------------------- // allocate row id(s) //-------------------------------------------------------------------------- - curColStruct = colStructList[0]; + curColStruct = colStructList[colId]; colOp = m_colOp[op(curColStruct.fCompressionType)]; colOp->initColumn(curCol); @@ -1084,23 +1120,27 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, vector fileInfo; dbRoot = curColStruct.fColDbRoot; //use the first column to calculate row id - ColExtsInfo aColExtsInfo = tableMetaData->getColExtsInfo(colStructList[0].dataOid); + ColExtsInfo aColExtsInfo = tableMetaData->getColExtsInfo(colStructList[colId].dataOid); ColExtsInfo::iterator it = aColExtsInfo.begin(); while (it != aColExtsInfo.end()) { - if ((it->dbRoot == colStructList[0].fColDbRoot) && (it->partNum == colStructList[0].fColPartition) && (it->segNum == colStructList[0].fColSegment) && it->current ) + if ((it->dbRoot == colStructList[colId].fColDbRoot) && + (it->partNum == colStructList[colId].fColPartition) && + (it->segNum == colStructList[colId].fColSegment) && it->current ) + { break; + } it++; } if (it != aColExtsInfo.end()) { hwm = it->hwm; - //cout << "Got from colextinfo hwm for oid " << colStructList[0].dataOid << " is " << hwm << " and seg is " << colStructList[0].fColSegment << endl; + //cout << "Got from colextinfo hwm for oid " << colStructList[colId].dataOid << " is " << hwm << " and seg is " << colStructList[0].fColSegment << endl; } oldHwm = hwm; //Save this info for rollback //need to pass real dbRoot, partition, and segment to setColParam - colOp->setColParam(curCol, 0, curColStruct.colWidth, curColStruct.colDataType, + colOp->setColParam(curCol, colId, curColStruct.colWidth, curColStruct.colDataType, curColStruct.colType, curColStruct.dataOid, curColStruct.fCompressionType, curColStruct.fColDbRoot, curColStruct.fColPartition, curColStruct.fColSegment); rc = colOp->openColumnFile(curCol, segFile, useTmpSuffix); // @bug 5572 HDFS tmp file @@ -1123,13 +1163,13 @@ timer.start("allocRowId"); if (idbdatafile::IDBPolicy::useHdfs()) insertSelect = true; - rc = colOp->allocRowId(txnid, bUseStartExtent, + rc = colOp->allocRowId(txnid, bUseStartExtent, curCol, (uint64_t)totalRow, rowIdArray, hwm, newExtent, rowsLeft, newHwm, newFile, newColStructList, newDctnryStructList, dbRootExtentTrackers, insertSelect, true, tableOid, isFirstBatchPm); - //cout << "after allocrowid, total row = " < 256K. // if totalRow == rowsLeft, then not adding rows to 1st extent, so skip it. //-------------------------------------------------------------------------- -// DMC-SHARED_NOTHING_NOTE: Is it safe to assume only part0 seg0 is abbreviated? + // DMC-SHARED_NOTHING_NOTE: Is it safe to assume only part0 seg0 is abbreviated? if ((curCol.dataFile.fPartition == 0) && (curCol.dataFile.fSegment == 0) && ((totalRow-rowsLeft) > 0) && (rowIdArray[totalRow-rowsLeft-1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k=1; ksetColParam(expandCol, 0, @@ -1505,18 +1548,10 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, for (i = 0; i < colStructList.size(); i++) Convertor::convertColType(&colStructList[i]); - // MCOL-984: find the smallest column width to calculate the RowID from so - // that all HWMs will be incremented by this operation - int32_t lowColLen = 8192; - int32_t colId = 0; - for (uint32_t colIt = 0; colIt < colStructList.size(); colIt++) - { - if (colStructList[colIt].colWidth < lowColLen) - { - colId = colIt; - lowColLen = colStructList[colId].colWidth; - } - } + uint32_t colId = 0; + // MCOL-1675: find the smallest column width to calculate the RowID from so + // that all HWMs will be incremented by this operation + findSmallestColumn(colId, colStructList); // rc = checkValid(txnid, colStructList, colValueList, ridList); // if (rc != NO_ERROR) @@ -1809,7 +1844,7 @@ timer.stop("allocRowId"); // Expand initial abbreviated extent if any RID in 1st extent is > 256K. // if totalRow == rowsLeft, then not adding rows to 1st extent, so skip it. //-------------------------------------------------------------------------- -// DMC-SHARED_NOTHING_NOTE: Is it safe to assume only part0 seg0 is abbreviated? + // DMC-SHARED_NOTHING_NOTE: Is it safe to assume only part0 seg0 is abbreviated? if ((curCol.dataFile.fPartition == 0) && (curCol.dataFile.fSegment == 0) && ((totalRow-rowsLeft) > 0) && @@ -1821,7 +1856,8 @@ timer.stop("allocRowId"); if (k == colId) continue; Column expandCol; - colOp = m_colOp[op(colStructList[k].fCompressionType)]; + colOp = m_colOp[op(colStructList[k].fCompressionType)]; + // Shouldn't we change 0 to colId here? colOp->setColParam(expandCol, 0, colStructList[k].colWidth, colStructList[k].colDataType, @@ -2782,6 +2818,11 @@ StopWatch timer; for (i = 0; i < colStructList.size(); i++) Convertor::convertColType(&colStructList[i]); + uint32_t colId = 0; + // MCOL-1675: find the smallest column width to calculate the RowID from so + // that all HWMs will be incremented by this operation + findSmallestColumn(colId, colStructList); + rc = checkValid(txnid, colStructList, colValueList, ridList); if (rc != NO_ERROR) return rc; @@ -2799,7 +2840,7 @@ StopWatch timer; //-------------------------------------------------------------------------- // allocate row id(s) //-------------------------------------------------------------------------- - curColStruct = colStructList[0]; + curColStruct = colStructList[colId]; colOp = m_colOp[op(curColStruct.fCompressionType)]; colOp->initColumn(curCol); @@ -2834,7 +2875,7 @@ StopWatch timer; oldHwm = hwm; //Save this info for rollback //need to pass real dbRoot, partition, and segment to setColParam - colOp->setColParam(curCol, 0, curColStruct.colWidth, curColStruct.colDataType, + colOp->setColParam(curCol, colId, curColStruct.colWidth, curColStruct.colDataType, curColStruct.colType, curColStruct.dataOid, curColStruct.fCompressionType, dbRoot, partitionNum, segmentNum); @@ -2944,13 +2985,15 @@ timer.stop("allocRowId"); // if totalRow == rowsLeft, then not adding rows to 1st extent, so skip it. //-------------------------------------------------------------------------- // DMC-SHARED_NOTHING_NOTE: Is it safe to assume only part0 seg0 is abbreviated? - if ((colStructList[0].fColPartition == 0) && - (colStructList[0].fColSegment == 0) && + if ((colStructList[colId].fColPartition == 0) && + (colStructList[colId].fColSegment == 0) && ((totalRow-rowsLeft) > 0) && (rowIdArray[totalRow-rowsLeft-1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k=1; ksetColParam(expandCol, 0, diff --git a/writeengine/wrapper/writeengine.h b/writeengine/wrapper/writeengine.h index 099854f77..93729ed75 100644 --- a/writeengine/wrapper/writeengine.h +++ b/writeengine/wrapper/writeengine.h @@ -607,6 +607,11 @@ private: */ int checkValid(const TxnID& txnid, const ColStructList& colStructList, const ColValueList& colValueList, const RIDList& ridList) const; + /** + * @brief Find the smallest column for this table + */ + void findSmallestColumn(uint32_t &colId, ColStructList colStructList); + /** * @brief Convert interface column type to a internal column type */ From 58bc867ca17b85d2447fe25835787d58bea2c73c Mon Sep 17 00:00:00 2001 From: Ravi Prakash Date: Thu, 30 Aug 2018 20:24:24 -0700 Subject: [PATCH 06/32] MCOL-1188 assertion 'fColumn.get() && fSub && fFunc' failed, ... MySQL server crashed. The problem was in processing a subquery in the where clause that was categorized as a CACHE_ITEM in the parse tree. The fix involved how we walk the parse tree in gp_walk(). --- dbcon/mysql/ha_calpont_execplan.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 42d26108c..47f07c67b 100755 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4096,15 +4096,17 @@ void gp_walk(const Item *item, void *arg) if (itype == Item::FUNC_ITEM && ((Item_func*)item)->functype() == Item_func::XOR_FUNC ) itype = Item::COND_ITEM; - if(item->type() == Item::CACHE_ITEM) - { - item = ((Item_cache*)item)->get_example(); - itype = item->type(); - isCached = true; - } - switch (itype) { + case Item::CACHE_ITEM: + { + // The item or condition is cached as per MariaDB server view but + // for InfiniDB it need to be executed. + // MCOL-1188 and + Item* orig_item = ((Item_cache*)item)->get_example(); + orig_item->traverse_cond(gp_walk, gwip, Item::POSTFIX); + break; + } case Item::FIELD_ITEM: { Item_field* ifp = (Item_field*)item; From 5247dfa0825186047c1650f0e626e145615a584e Mon Sep 17 00:00:00 2001 From: Ravi Prakash Date: Tue, 4 Sep 2018 12:20:40 -0700 Subject: [PATCH 07/32] MCOL-1188 assertion 'fColumn.get() && fSub && fFunc' failed,... Some cleanup for the previous check-in. --- dbcon/mysql/ha_calpont_execplan.cpp | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 47f07c67b..2de5dfa56 100755 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -4084,7 +4084,6 @@ void gp_walk(const Item *item, void *arg) { gp_walk_info* gwip = reinterpret_cast(arg); idbassert(gwip); - bool isCached = false; //Bailout... if (gwip->fatalParseError) return; @@ -4101,8 +4100,8 @@ void gp_walk(const Item *item, void *arg) case Item::CACHE_ITEM: { // The item or condition is cached as per MariaDB server view but - // for InfiniDB it need to be executed. - // MCOL-1188 and + // for InfiniDB it need to be parsed and executed. + // MCOL-1188 and MCOL-1029 Item* orig_item = ((Item_cache*)item)->get_example(); orig_item->traverse_cond(gp_walk, gwip, Item::POSTFIX); break; @@ -4286,13 +4285,9 @@ void gp_walk(const Item *item, void *arg) cc->resultType(colType_MysqlToIDB(item)); } - // cached item comes in one piece - if (!isCached) + for (uint32_t i = 0; i < ifp->argument_count() && !gwip->rcWorkStack.empty(); i++) { - for (uint32_t i = 0; i < ifp->argument_count() && !gwip->rcWorkStack.empty(); i++) - { - gwip->rcWorkStack.pop(); - } + gwip->rcWorkStack.pop(); } // bug 3137. If filter constant like 1=0, put it to ptWorkStack // MariaDB bug 750. Breaks if compare is an argument to a function. @@ -4360,14 +4355,6 @@ void gp_walk(const Item *item, void *arg) bool isOr = (ftype == Item_func::COND_OR_FUNC); bool isXor = (ftype == Item_func::XOR_FUNC); - // MCOL-1029 A cached COND_ITEM is something like: - // AND (TRUE OR FALSE) - // We can skip it - if (isCached) - { - break; - } - List *argumentList; List xorArgumentList; if (isXor) From e5f18964f06e8624bf27bea088fdee792447c40c Mon Sep 17 00:00:00 2001 From: David Hill Date: Tue, 4 Sep 2018 16:41:44 -0500 Subject: [PATCH 08/32] MCOL-1523 --- oam/install_scripts/columnstoreAlias | 3 + oamapps/mcsadmin/mcsadmin.cpp | 30 +++- oamapps/postConfigure/postConfigure.cpp | 188 +++++++++++------------- procmgr/processmanager.cpp | 149 ++++++++++++------- 4 files changed, 212 insertions(+), 158 deletions(-) diff --git a/oam/install_scripts/columnstoreAlias b/oam/install_scripts/columnstoreAlias index cd225c1a9..255eb7e7e 100644 --- a/oam/install_scripts/columnstoreAlias +++ b/oam/install_scripts/columnstoreAlias @@ -10,5 +10,8 @@ alias core='cd /var/log/mariadb/columnstore/corefiles' alias tmsg='tail -f /var/log/messages' alias tdebug='tail -f /var/log/mariadb/columnstore/debug.log' alias tinfo='tail -f /var/log/mariadb/columnstore/info.log' +alias terror='tail -f /var/log/mariadb/columnstore/err.log' +alias twarning='tail -f /var/log/mariadb/columnstore/warning.log' +alias tcrit='tail -f /var/log/mariadb/columnstore/crit.log' alias dbrm='cd /usr/local/mariadb/columnstore/data1/systemFiles/dbrm' alias module='cat /usr/local/mariadb/columnstore/local/module' diff --git a/oamapps/mcsadmin/mcsadmin.cpp b/oamapps/mcsadmin/mcsadmin.cpp index 935d081cc..6090058d4 100644 --- a/oamapps/mcsadmin/mcsadmin.cpp +++ b/oamapps/mcsadmin/mcsadmin.cpp @@ -7036,15 +7036,33 @@ int processCommand(string* arguments) if (systemstatus.SystemOpState == oam::ACTIVE ) { try { - cout << endl << " Starting Modules" << endl; - oam.startModule(devicenetworklist, ackTemp); +// cout << endl << " Starting Modules" << endl; +// oam.startModule(devicenetworklist, ackTemp); //reload DBRM with new configuration, needs to be done here after startModule - cmd = startup::StartUp::installDir() + "/bin/dbrmctl reload > /dev/null 2>&1"; - system(cmd.c_str()); - sleep(15); +// cmd = startup::StartUp::installDir() + "/bin/dbrmctl reload > /dev/null 2>&1"; +// system(cmd.c_str()); +// sleep(15); - cout << " Successful start of Modules " << endl; +// cout << " Successful start of Modules " << endl; + + cout << endl << " Restarting System "; + int returnStatus = oam.restartSystem(gracefulTemp, ackTemp); + switch (returnStatus) + { + case API_SUCCESS: + if ( waitForActive() ) + cout << endl << " Successful restart of System " << endl << endl; + else + cout << endl << "**** restartSystem Failed : check log files" << endl; + break; + case API_CANCELLED: + cout << endl << " Restart of System canceled" << endl << endl; + break; + default: + cout << endl << "**** restartSystem Failed : Check system logs" << endl; + break; + } } catch (exception& e) { diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index 98227da9d..e69677039 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -1247,26 +1247,16 @@ int main(int argc, char *argv[]) //amazon install setup check bool amazonInstall = false; string cloud = oam::UnassignedName; - system("aws --version > /tmp/amazon.log 2>&1"); - - ifstream in("/tmp/amazon.log"); - - in.seekg(0, std::ios::end); - int size = in.tellg(); - if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not found")) + + if (!multi_server_quick_install) { - // not running on amazon with ec2-api-tools - if (amazon_quick_install) - { - cout << "ERROR: Amazon Quick Installer was specified, bu the AMazon CLI API packages isnt installed, exiting" << endl; - exit(1); - } + system("aws --version > /tmp/amazon.log 2>&1"); - amazonInstall = false; - } - else - { - if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not installed")) + ifstream in("/tmp/amazon.log"); + + in.seekg(0, std::ios::end); + int size = in.tellg(); + if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not found")) { // not running on amazon with ec2-api-tools if (amazon_quick_install) @@ -1278,9 +1268,23 @@ int main(int argc, char *argv[]) amazonInstall = false; } else - amazonInstall = true; - } + { + if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not installed")) + { + // not running on amazon with ec2-api-tools + if (amazon_quick_install) + { + cout << "ERROR: Amazon Quick Installer was specified, bu the AMazon CLI API packages isnt installed, exiting" << endl; + exit(1); + } + amazonInstall = false; + } + else + amazonInstall = true; + } + } + try { cloud = sysConfig->getConfig(InstallSection, "Cloud"); } @@ -3090,7 +3094,9 @@ int main(int argc, char *argv[]) //check if dbrm data resides in older directory path and inform user if it does dbrmDirCheck(); - if ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM && pmNumber == 1) { + if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || + ( (IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM) && pmwithum ) ) + { //run the mysql / mysqld setup scripts cout << endl << "===== Running the MariaDB ColumnStore MariaDB Server setup scripts =====" << endl << endl; @@ -3098,7 +3104,57 @@ int main(int argc, char *argv[]) // call the mysql setup scripts mysqlSetup(); - sleep(5); + sleep(3); + } + + if ( IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM || + pmNumber > 1 ) + { + if ( password.empty() ) + { + cout << endl; + cout << "Next step is to enter the password to access the other Servers." << endl; + cout << "This is either your password or you can default to using a ssh key" << endl; + cout << "If using a password, the password needs to be the same on all Servers." << endl << endl; + + if ( noPrompting ) { + cout << "Enter password, hit 'enter' to default to using a ssh key, or 'exit' > " << endl; + password = "ssh"; + } + else + { + while(true) + { + char *pass1, *pass2; + + pass1=getpass("Enter password, hit 'enter' to default to using a ssh key, or 'exit' > "); + if ( strcmp(pass1, "") == 0 ) { + password = "ssh"; + break; + } + + string p1 = pass1; + if ( p1 == "exit") + exit(0); + + pass2=getpass("Confirm password > "); + string p2 = pass2; + if ( p1 == p2 ) { + password = p2; + break; + } + else + cout << "Password mismatch, please re-enter" << endl; + } + + //add single quote for special characters + if ( password != "ssh" ) + { + password = "'" + password + "'"; + } + + } + } } int thread_id = 0; @@ -3116,7 +3172,7 @@ int main(int argc, char *argv[]) //skip interface with remote servers and perform install if ( !nonDistribute ) { - // + // // perform remote install of other servers in the system // cout << endl << "===== System Installation =====" << endl << endl; @@ -3173,67 +3229,8 @@ int main(int argc, char *argv[]) if( !pkgCheck(columnstorePackage) ) exit(1); - if ( password.empty() ) - { - cout << endl; - cout << "Next step is to enter the password to access the other Servers." << endl; - cout << "This is either your password or you can default to using a ssh key" << endl; - cout << "If using a password, the password needs to be the same on all Servers." << endl << endl; - } - - while(true) - { - char *pass1, *pass2; - - if ( noPrompting ) { - cout << "Enter password, hit 'enter' to default to using a ssh key, or 'exit' > " << endl; - if ( password.empty() ) - password = "ssh"; - break; - } - - //check for command line option password - if ( !password.empty() ) - break; - - pass1=getpass("Enter password, hit 'enter' to default to using a ssh key, or 'exit' > "); - if ( strcmp(pass1, "") == 0 ) { - password = "ssh"; - break; - } - - if ( pass1 == "exit") - exit(0); - - string p1 = pass1; - pass2=getpass("Confirm password > "); - string p2 = pass2; - if ( p1 == p2 ) { - password = p2; - break; - } - else - cout << "Password mismatch, please re-enter" << endl; - } - - //add single quote for special characters - if ( password != "ssh" ) - { - password = "'" + password + "'"; - } - checkSystemMySQLPort(mysqlPort, sysConfig, USER, password, childmodulelist, IserverTypeInstall, pmwithum); - if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || - ( (IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM) && pmwithum ) ) - { - cout << endl << "===== Running the MariaDB ColumnStore MariaDB ColumnStore setup scripts =====" << endl << endl; - - // call the mysql setup scripts - mysqlSetup(); - sleep(5); - } - string AmazonInstall = "0"; if ( amazonInstall ) AmazonInstall = "1"; @@ -3411,19 +3408,7 @@ int main(int argc, char *argv[]) cout << " DONE" << endl; } } - else - { - if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || - ( (IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM) && pmwithum ) ) - { - cout << endl << "===== Running the MariaDB ColumnStore MariaDB ColumnStore setup scripts =====" << endl << endl; - - // call the mysql setup scripts - mysqlSetup(); - sleep(5); - } - } - + //configure data redundancy if (DataRedundancy) { @@ -3641,9 +3626,6 @@ int main(int argc, char *argv[]) } //set mysql replication, if wasn't setup before on system -// if ( ( mysqlRep && pmwithum ) || -// ( mysqlRep && (umNumber > 1) ) || -// ( mysqlRep && (pmNumber > 1) && (IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM) ) ) if ( mysqlRep ) { cout << endl << "Run MariaDB ColumnStore Replication Setup.. "; @@ -3665,7 +3647,10 @@ int main(int argc, char *argv[]) cout << "Enter the following command to define MariaDB ColumnStore Alias Commands" << endl << endl; - cout << ". " + installDir + "/bin/columnstoreAlias" << endl << endl; + if ( !rootUser ) + cout << ". /etc/profile.d/columnstoreEnv.sh" << endl; + + cout << ". /etc/profile.d/columnstoreAlias.sh" << endl << endl; cout << "Enter 'mcsmysql' to access the MariaDB ColumnStore SQL console" << endl; cout << "Enter 'mcsadmin' to access the MariaDB ColumnStore Admin console" << endl << endl; @@ -3682,7 +3667,10 @@ int main(int argc, char *argv[]) cout << "Enter the following command to define MariaDB ColumnStore Alias Commands" << endl << endl; - cout << ". " + installDir + "/bin/columnstoreAlias" << endl << endl; + if ( !rootUser ) + cout << ". /etc/profile.d/columnstoreEnv.sh" << endl; + + cout << ". /etc/profile.d/columnstoreAlias.sh" << endl << endl; cout << "Enter 'mcsmysql' to access the MariaDB ColumnStore SQL console" << endl; cout << "Enter 'mcsadmin' to access the MariaDB ColumnStore Admin console" << endl << endl; diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 0a054f9c3..89f9a145c 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -575,10 +575,12 @@ void processMSG(messageqcpp::IOSocket* cfIos) if ( count > 0 ) { + string module = oam::UnassignedName; for (int i = 0; i < count; i++) { msg >> value; devicenetworkconfig.DeviceName = value; + module = value; msg >> value; devicenetworkconfig.UserTempDeviceName = value; msg >> value; @@ -606,11 +608,24 @@ void processMSG(messageqcpp::IOSocket* cfIos) } if( status == API_SUCCESS) { + processManager.setSystemState(oam::BUSY_INIT); + + //set query system state not ready + processManager.setQuerySystemState(false); + + //set recycle process + processManager.recycleProcess(target, true); + //distribute config file processManager.distributeConfigFile("system"); + processManager.setSystemState(oam::ACTIVE); + + //set query system state ready + processManager.setQuerySystemState(true); + //call dbrm control - oam.dbrmctl("halt"); +/* oam.dbrmctl("halt"); log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG); oam.dbrmctl("reload"); @@ -618,13 +633,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) oam.dbrmctl("resume"); log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - -// processManager.restartProcessType("ExeMgr"); - - //setup MySQL Replication for started modules -// log.writeLog(__LINE__, "Setup MySQL Replication for module being started", LOG_TYPE_DEBUG); -// processManager.setMySQLReplication(startdevicenetworklist); - } +*/ } } else { @@ -829,8 +838,10 @@ void processMSG(messageqcpp::IOSocket* cfIos) if (opState == oam::MAN_OFFLINE || opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED || opState == oam::AUTO_OFFLINE) { - oam.dbrmctl("halt"); - log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG); + processManager.setSystemState(oam::BUSY_INIT); + + //set query system state not ready + processManager.setQuerySystemState(false); status = processManager.disableModule(moduleName, true); log.writeLog(__LINE__, "Disable Module Completed on " + moduleName, LOG_TYPE_INFO); @@ -839,14 +850,11 @@ void processMSG(messageqcpp::IOSocket* cfIos) //check for SIMPLEX Processes on mate might need to be started processManager.checkSimplexModule(moduleName); + + processManager.setSystemState(oam::ACTIVE); - //call dbrm control -// oam.dbrmctl("reload"); -// log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - - // resume the dbrm - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); + //set query system state ready + processManager.setQuerySystemState(true); } else { @@ -910,7 +918,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) DeviceNetworkList::iterator listPT = devicenetworklist.begin(); - //stopModules being removed with the REMOVE option, which will stop process + // do stopmodule then enable for( ; listPT != devicenetworklist.end() ; listPT++) { string moduleName = (*listPT).DeviceName; @@ -933,6 +941,9 @@ void processMSG(messageqcpp::IOSocket* cfIos) } if (opState == oam::MAN_DISABLED) { + processManager.stopModule(moduleName, graceful, manualFlag); + log.writeLog(__LINE__, "stop Module Completed on " + moduleName, LOG_TYPE_INFO); + status = processManager.enableModule(moduleName, oam::MAN_OFFLINE); log.writeLog(__LINE__, "Enable Module Completed on " + moduleName, LOG_TYPE_INFO); } @@ -1246,6 +1257,9 @@ void processMSG(messageqcpp::IOSocket* cfIos) log.writeLog(__LINE__, "STOPSYSTEM: ACK back to sender"); } + //set query system state ready + processManager.setQuerySystemState(true); + startsystemthreadStop = false; break; @@ -2758,9 +2772,6 @@ void processMSG(messageqcpp::IOSocket* cfIos) log.writeLog(__LINE__, "MSG RECEIVED: Process Restarted on " + moduleName + "/" + processName); //set query system states not ready - BRM::DBRM dbrm; - dbrm.setSystemQueryReady(false); - processManager.setQuerySystemState(false); processManager.setSystemState(oam::BUSY_INIT); @@ -2841,12 +2852,14 @@ void processMSG(messageqcpp::IOSocket* cfIos) break; sleep(1); } - dbrm.setSystemQueryReady(true); + processManager.setQuerySystemState(true); + } // if a DDLProc was restarted, reinit DMLProc if( processName == "DDLProc") { processManager.reinitProcessType("DMLProc"); + processManager.setQuerySystemState(true); } //only run on auto process restart @@ -2893,9 +2906,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) } } - //enable query stats - dbrm.setSystemQueryReady(true); - + //set query system states ready processManager.setQuerySystemState(true); processManager.setSystemState(oam::ACTIVE); @@ -3386,7 +3397,7 @@ int ProcessManager::disableModule(string target, bool manualFlag) /****************************************************************************************** * @brief recycleProcess * -* purpose: recyle process, generally after some disable module is run +* purpose: recyle process, done after disable/enable module * ******************************************************************************************/ void ProcessManager::recycleProcess(string module, bool enableModule) @@ -3410,48 +3421,65 @@ void ProcessManager::recycleProcess(string module, bool enableModule) //recycle DBRM processes in all cases restartProcessType("DBRMControllerNode"); restartProcessType("DBRMWorkerNode"); + sleep(5); restartProcessType("DMLProc"); return; } //recycle DBRM processes in all cases - restartProcessType("DBRMControllerNode", module); - restartProcessType("DBRMWorkerNode"); +// restartProcessType("DBRMControllerNode", module); +// restartProcessType("DBRMWorkerNode"); - - // only recycle dmlproc, if down/up module is non-parent UM - if ( ( moduleType == "um" ) && - ( PrimaryUMModuleName != module) ) + // only recycle ddl/dmlproc, if down/up module is non-parent UM +/* if ( ( moduleType == "um" ) && + if ( PrimaryUMModuleName != module) { + restartProcessType("DDLProc",module); restartProcessType("DMLProc",module); return; } - - if( PrimaryUMModuleName == module) - { - stopProcessType("DDLProc"); - stopProcessType("DMLProc"); - } +*/ +// if( PrimaryUMModuleName == module) +// { +// stopProcessType("DDLProc"); +// stopProcessType("DMLProc"); +// } + + stopProcessType("WriteEngineServer"); stopProcessType("ExeMgr"); + + stopProcessType("PrimProc"); - restartProcessType("PrimProc"); - sleep(1); + stopProcessType("DBRMControllerNode"); + stopProcessType("DBRMWorkerNode"); + + stopProcessType("DDLProc"); + stopProcessType("DMLProc"); - restartProcessType("mysqld"); + stopProcessType("mysqld"); - restartProcessType("WriteEngineServer"); - sleep(1); +// restartProcessType("mysqld"); + + startProcessType("DBRMControllerNode"); + startProcessType("DBRMWorkerNode"); + + startProcessType("PrimProc"); + sleep(5); + + startProcessType("WriteEngineServer"); + sleep(3); startProcessType("ExeMgr"); - sleep(1); startProcessType("DDLProc"); sleep(1); startProcessType("DMLProc"); + startProcessType("mysqld"); + return; } @@ -3500,8 +3528,8 @@ int ProcessManager::enableModule(string target, int state, bool failover) setStandbyModule(newStandbyModule); //set recycle process - if (!failover) - recycleProcess(target); +// if (!failover) +// recycleProcess(target); log.writeLog(__LINE__, "enableModule request for " + target + " completed", LOG_TYPE_DEBUG); @@ -3774,6 +3802,7 @@ void ProcessManager::setSystemState(uint16_t state) Oam oam; ALARMManager aManager; Configuration config; + ProcessManager processManager(config, log); log.writeLog(__LINE__, "Set System State = " + oamState[state], LOG_TYPE_DEBUG); @@ -3794,6 +3823,9 @@ void ProcessManager::setSystemState(uint16_t state) // Process Alarms string system = "System"; if( state == oam::ACTIVE ) { + //set query system states ready + processManager.setQuerySystemState(true); + //clear alarms if set aManager.sendAlarmReport(system.c_str(), SYSTEM_DOWN_AUTO, CLEAR); aManager.sendAlarmReport(system.c_str(), SYSTEM_DOWN_MANUAL, CLEAR); @@ -6244,7 +6276,7 @@ int ProcessManager::sendMsgProcMon( std::string module, ByteStream msg, int requ string IPAddr = sysConfig->getConfig(msgPort, "IPAddr"); if ( IPAddr == oam::UnassignedIpAddr ) { - log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "sendMsgProcMon ping failure " + module + " " + IPAddr, LOG_TYPE_ERROR); return oam::API_SUCCESS; } @@ -6253,7 +6285,7 @@ int ProcessManager::sendMsgProcMon( std::string module, ByteStream msg, int requ string cmd = cmdLine + IPAddr + cmdOption; if ( system(cmd.c_str()) != 0) { //ping failure - log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "sendMsgProcMon ping failure " + module + " " + IPAddr, LOG_TYPE_ERROR); return oam::API_SUCCESS; } } @@ -6490,12 +6522,22 @@ void ProcessManager::setQuerySystemState(bool set) try { dbrm.setSystemQueryReady(set); - log.writeLog(__LINE__, "setQuerySystemState successful", LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "setSystemQueryReady successful", LOG_TYPE_DEBUG); + + try { + dbrm.setSystemReady(set); + log.writeLog(__LINE__, "setSystemReady successful", LOG_TYPE_DEBUG); + } + catch(...) + { + log.writeLog(__LINE__, "setSystemReady failed", LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "setSystemReady failed", LOG_TYPE_ERROR); + } } catch(...) { - log.writeLog(__LINE__, "setQuerySystemState failed", LOG_TYPE_DEBUG); - log.writeLog(__LINE__, "setQuerySystemState failed", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "setSystemQueryReady failed", LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "setSystemQueryReady failed", LOG_TYPE_ERROR); } } @@ -6993,7 +7035,7 @@ void startSystemThread(oam::DeviceNetworkList Devicenetworklist) } //set query system state not ready - processManager.setQuerySystemState(true); + processManager.setQuerySystemState(false); // Bug 4554: Wait until DMLProc is finished with rollback if (status == oam::API_SUCCESS) @@ -7062,6 +7104,9 @@ void startSystemThread(oam::DeviceNetworkList Devicenetworklist) processManager.setSystemState(rtn); } + //set query system state ready + processManager.setQuerySystemState(true); + // exit thread log.writeLog(__LINE__, "startSystemThread Exit", LOG_TYPE_DEBUG); startsystemthreadStatus = status; From 8b0507b9872ce3946006e3de90448a39606b3fda Mon Sep 17 00:00:00 2001 From: David Hill Date: Wed, 5 Sep 2018 14:53:13 -0500 Subject: [PATCH 09/32] MCOL-1523 --- oamapps/mcsadmin/mcsadmin.cpp | 11 +-- oamapps/postConfigure/postConfigure.cpp | 2 +- procmgr/main.cpp | 10 +-- procmgr/processmanager.cpp | 113 ++++++++---------------- procmon/main.cpp | 22 ++--- 5 files changed, 52 insertions(+), 106 deletions(-) diff --git a/oamapps/mcsadmin/mcsadmin.cpp b/oamapps/mcsadmin/mcsadmin.cpp index 6090058d4..befcb68fa 100644 --- a/oamapps/mcsadmin/mcsadmin.cpp +++ b/oamapps/mcsadmin/mcsadmin.cpp @@ -7036,17 +7036,8 @@ int processCommand(string* arguments) if (systemstatus.SystemOpState == oam::ACTIVE ) { try { -// cout << endl << " Starting Modules" << endl; -// oam.startModule(devicenetworklist, ackTemp); - - //reload DBRM with new configuration, needs to be done here after startModule -// cmd = startup::StartUp::installDir() + "/bin/dbrmctl reload > /dev/null 2>&1"; -// system(cmd.c_str()); -// sleep(15); - -// cout << " Successful start of Modules " << endl; - cout << endl << " Restarting System "; + gracefulTemp = oam::FORCEFUL; int returnStatus = oam.restartSystem(gracefulTemp, ackTemp); switch (returnStatus) { diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index e69677039..fe7a3b337 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -3172,7 +3172,7 @@ int main(int argc, char *argv[]) //skip interface with remote servers and perform install if ( !nonDistribute ) { - // + // // perform remote install of other servers in the system // cout << endl << "===== System Installation =====" << endl << endl; diff --git a/procmgr/main.cpp b/procmgr/main.cpp index 2747fda16..5ef5113f1 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -1489,7 +1489,7 @@ void pingDeviceThread() if (moduleInfoList[moduleName] >= ModuleHeartbeatCount || opState == oam::DOWN || opState == oam::AUTO_DISABLED) { - log.writeLog(__LINE__, "Module alive, bring it back online: " + moduleName, LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "*** Module alive, bring it back online: " + moduleName, LOG_TYPE_DEBUG); string PrimaryUMModuleName = config.moduleName(); try { @@ -1927,7 +1927,7 @@ void pingDeviceThread() { //Log failure, issue alarm, set moduleOpState Configuration config; - log.writeLog(__LINE__, "module is down: " + moduleName, LOG_TYPE_CRITICAL); + log.writeLog(__LINE__, "*** module is down: " + moduleName, LOG_TYPE_CRITICAL); //set query system state not ready BRM::DBRM dbrm; @@ -2013,9 +2013,6 @@ void pingDeviceThread() // resume the dbrm oam.dbrmctl("resume"); log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - - //set recycle process - processManager.recycleProcess(moduleName); } // return values = 'ip address' for running or rebooting, stopped or terminated @@ -2234,9 +2231,6 @@ void pingDeviceThread() oam.dbrmctl("resume"); log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - //set recycle process - processManager.recycleProcess(moduleName); - //enable query stats dbrm.setSystemQueryReady(true); diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 89f9a145c..daacdbf10 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -619,21 +619,11 @@ void processMSG(messageqcpp::IOSocket* cfIos) //distribute config file processManager.distributeConfigFile("system"); - processManager.setSystemState(oam::ACTIVE); - //set query system state ready processManager.setQuerySystemState(true); - //call dbrm control -/* oam.dbrmctl("halt"); - log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG); - - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); -*/ } + processManager.setSystemState(oam::ACTIVE); + } } else { @@ -846,15 +836,10 @@ void processMSG(messageqcpp::IOSocket* cfIos) status = processManager.disableModule(moduleName, true); log.writeLog(__LINE__, "Disable Module Completed on " + moduleName, LOG_TYPE_INFO); - processManager.recycleProcess(moduleName); - - //check for SIMPLEX Processes on mate might need to be started - processManager.checkSimplexModule(moduleName); - - processManager.setSystemState(oam::ACTIVE); - //set query system state ready processManager.setQuerySystemState(true); + + processManager.setSystemState(oam::ACTIVE); } else { @@ -1611,6 +1596,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) status = retStatus; } } + //now stop local module processManager.stopModule(config.moduleName(), graceful, manualFlag ); @@ -1627,7 +1613,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) oam::DeviceNetworkList devicenetworklist; pthread_t startsystemthread; - pthread_create (&startsystemthread, NULL, (void*(*)(void*)) &startSystemThread, &devicenetworklist); + status = pthread_create (&startsystemthread, NULL, (void*(*)(void*)) &startSystemThread, &devicenetworklist); if ( status != 0 ) { log.writeLog(__LINE__, "STARTMODULE: pthread_create failed, return status = " + oam.itoa(status)); @@ -1636,20 +1622,19 @@ void processMSG(messageqcpp::IOSocket* cfIos) if (status == 0 && ackIndicator) { - // BUG 4554 We don't need the join because calpont console is now looking for "Active" - // We need to return the ack right away to let console know we got the message. -// pthread_join(startsystemthread, NULL); -// status = startsystemthreadStatus; + pthread_join(startsystemthread, NULL); + status = startsystemthreadStatus; } - - // setup MySQL Replication after switchover command -/* if (graceful == FORCEFUL) + + // setup MySQL Replication after FORCE restart command + if ( (status == API_SUCCESS) && + (graceful == oam::FORCEFUL) ) { - log.writeLog(__LINE__, "Setup MySQL Replication for restartSystem FORCE, used by switch-parent command", LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "Setup MySQL Replication for restartSystem FORCE", LOG_TYPE_DEBUG); oam::DeviceNetworkList devicenetworklist; processManager.setMySQLReplication(devicenetworklist); } -*/ + log.writeLog(__LINE__, "RESTARTSYSTEM: Start System Request Completed", LOG_TYPE_INFO); } @@ -3277,6 +3262,7 @@ int ProcessManager::shutdownModule(string target, ByteStream::byte actionIndicat int ProcessManager::disableModule(string target, bool manualFlag) { Oam oam; + ProcessManager processManager(config, log); ModuleConfig moduleconfig; log.writeLog(__LINE__, "disableModule request for " + target, LOG_TYPE_DEBUG); @@ -3386,6 +3372,11 @@ int ProcessManager::disableModule(string target, bool manualFlag) if ( updateWorkerNodeconfig() != API_SUCCESS ) return API_FAILURE; + processManager.recycleProcess(target); + + //check for SIMPLEX Processes on mate might need to be started + processManager.checkSimplexModule(target); + //distribute config file distributeConfigFile("system"); @@ -3414,37 +3405,6 @@ void ProcessManager::recycleProcess(string module, bool enableModule) oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName); } catch(...) {} - - // restart DBRM Process and DMLProc and return if enable module is being done - if (enableModule) - { - //recycle DBRM processes in all cases - restartProcessType("DBRMControllerNode"); - restartProcessType("DBRMWorkerNode"); - sleep(5); - - restartProcessType("DMLProc"); - return; - } - - //recycle DBRM processes in all cases -// restartProcessType("DBRMControllerNode", module); -// restartProcessType("DBRMWorkerNode"); - - // only recycle ddl/dmlproc, if down/up module is non-parent UM -/* if ( ( moduleType == "um" ) && - if ( PrimaryUMModuleName != module) - { - restartProcessType("DDLProc",module); - restartProcessType("DMLProc",module); - return; - } -*/ -// if( PrimaryUMModuleName == module) -// { -// stopProcessType("DDLProc"); -// stopProcessType("DMLProc"); -// } stopProcessType("WriteEngineServer"); @@ -3526,10 +3486,6 @@ int ProcessManager::enableModule(string target, int state, bool failover) if ( newStandbyModule == target) setStandbyModule(newStandbyModule); - - //set recycle process -// if (!failover) -// recycleProcess(target); log.writeLog(__LINE__, "enableModule request for " + target + " completed", LOG_TYPE_DEBUG); @@ -6518,15 +6474,15 @@ void ProcessManager::setQuerySystemState(bool set) Oam oam; BRM::DBRM dbrm; - log.writeLog(__LINE__, "setQuerySystemState = " + oam.itoa(set), LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "setQuerySystemState called = " + oam.itoa(set), LOG_TYPE_DEBUG); try { dbrm.setSystemQueryReady(set); - log.writeLog(__LINE__, "setSystemQueryReady successful", LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "setSystemQueryReady = " + oam.itoa(set), LOG_TYPE_DEBUG); try { dbrm.setSystemReady(set); - log.writeLog(__LINE__, "setSystemReady successful", LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "setSystemReady = " + oam.itoa(set), LOG_TYPE_DEBUG); } catch(...) { @@ -7089,23 +7045,28 @@ void startSystemThread(oam::DeviceNetworkList Devicenetworklist) } if (DMLprocessstatus.ProcessOpState == oam::ACTIVE) { - rtn = oam::ACTIVE; + rtn = oam::ACTIVE; break; } if (DMLprocessstatus.ProcessOpState == oam::FAILED) { - rtn = oam::FAILED; + rtn = oam::FAILED; + status = oam::API_FAILURE; break; } - // wait some more - sleep(2); - } - processManager.setSystemState(rtn); + // wait some more + sleep(2); + } + + if ( rtn = oam::ACTIVE ) + //set query system state not ready + processManager.setQuerySystemState(true); + + processManager.setSystemState(rtn); } - - //set query system state ready - processManager.setQuerySystemState(true); + else + processManager.setSystemState(oam::FAILED); // exit thread log.writeLog(__LINE__, "startSystemThread Exit", LOG_TYPE_DEBUG); diff --git a/procmon/main.cpp b/procmon/main.cpp index d6edd4ac7..a30de1fa1 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -2451,17 +2451,17 @@ void processStatusMSG(messageqcpp::IOSocket* cfIos) } //if DMLProc set to ACTIVE, set system state to ACTIVE if in an INIT state - if ( processName == "DMLProc" && state == oam::ACTIVE ) - { - if ( fShmSystemStatus[0].OpState == oam::BUSY_INIT || - fShmSystemStatus[0].OpState == oam::MAN_INIT || - fShmSystemStatus[0].OpState == oam::AUTO_INIT ) - { - fShmSystemStatus[0].OpState = state; - memcpy(fShmSystemStatus[0].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); - log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set System State = " + oamState[state], LOG_TYPE_DEBUG); - } - } +// if ( processName == "DMLProc" && state == oam::ACTIVE ) +// { +// if ( fShmSystemStatus[0].OpState == oam::BUSY_INIT || +// fShmSystemStatus[0].OpState == oam::MAN_INIT || +// fShmSystemStatus[0].OpState == oam::AUTO_INIT ) +// { +// fShmSystemStatus[0].OpState = state; +// memcpy(fShmSystemStatus[0].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); +// log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set System State = " + oamState[state], LOG_TYPE_DEBUG); +// } +// } } break; From 14d3a34c2893d4d66d62802382f33f0d39f87195 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Fri, 7 Sep 2018 11:43:54 +0100 Subject: [PATCH 10/32] MCOL-1694 & MCOL-1505 Improved exception handling This patch catches exceptions in DDLProc, DMLProc and ExeMgr which could potentially happen during startup. Logging them instead of silently ignoring them (or crashing in ExeMgr). --- ddlproc/ddlproc.cpp | 33 ++++++++++++++++++++++++++++-- dmlproc/dmlproc.cpp | 44 ++++++++++++++++++++++++++++++++++++++++ dmlproc/dmlprocessor.cpp | 20 ++++++++++++++++++ exemgr/main.cpp | 30 +++++++++++++++++++++++++-- 4 files changed, 123 insertions(+), 4 deletions(-) diff --git a/ddlproc/ddlproc.cpp b/ddlproc/ddlproc.cpp index 45bc6a48d..d74295cfa 100644 --- a/ddlproc/ddlproc.cpp +++ b/ddlproc/ddlproc.cpp @@ -135,8 +135,30 @@ int main(int argc, char* argv[]) { oam.processInitComplete("DDLProc", ACTIVE); } + catch (std::exception& ex) + { + cerr << ex.what() << endl; + LoggingID logid(23, 0, 0); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("DDLProc init caught exception: "); + args1.add(ex.what()); + msg.format( args1 ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); + return 1; + } catch (...) { + cerr << "Caught unknown exception in init!" << endl; + LoggingID logid(23, 0, 0); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("DDLProc init caught unknown exception"); + msg.format( args1 ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); + return 1; } } @@ -147,21 +169,28 @@ int main(int argc, char* argv[]) catch (std::exception& ex) { cerr << ex.what() << endl; + LoggingID logid(23, 0, 0); Message::Args args; Message message(8); args.add("DDLProc failed on: "); args.add(ex.what()); message.format( args ); - + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, message, logid); + return 1; } catch (...) { cerr << "Caught unknown exception!" << endl; + LoggingID logid(23, 0, 0); Message::Args args; Message message(8); args.add("DDLProc failed on: "); - args.add("receiving DDLPackage"); + args.add("receiving DDLPackage (unknown exception)"); message.format( args ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, message, logid); + return 1; } return 0; } diff --git a/dmlproc/dmlproc.cpp b/dmlproc/dmlproc.cpp index df17fbed6..106977824 100644 --- a/dmlproc/dmlproc.cpp +++ b/dmlproc/dmlproc.cpp @@ -494,8 +494,30 @@ int main(int argc, char* argv[]) // At first we set to BUSY_INIT oam.processInitComplete("DMLProc", oam::BUSY_INIT); } + catch (std::exception& ex) + { + cerr << ex.what() << endl; + LoggingID logid(21, 0, 0); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("DMLProc init caught exception: "); + args1.add(ex.what()); + msg.format( args1 ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); + return 1; + } catch (...) { + cerr << "Caught unknown exception in init!" << endl; + LoggingID logid(21, 0, 0); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("DMLProc init caught unknown exception"); + msg.format( args1 ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); + return 1; } //@Bug 1627 @@ -584,8 +606,30 @@ int main(int argc, char* argv[]) { oam.processInitComplete("DMLProc", ACTIVE); } + catch (std::exception& ex) + { + cerr << ex.what() << endl; + LoggingID logid(21, 0, 0); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("DMLProc init caught exception: "); + args1.add(ex.what()); + msg.format( args1 ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); + return 1; + } catch (...) { + cerr << "Caught unknown exception in init!" << endl; + LoggingID logid(21, 0, 0); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("DMLProc init caught unknown exception"); + msg.format( args1 ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); + return 1; } Dec = DistributedEngineComm::instance(rm); diff --git a/dmlproc/dmlprocessor.cpp b/dmlproc/dmlprocessor.cpp index 2205d1712..3b3a5cffc 100644 --- a/dmlproc/dmlprocessor.cpp +++ b/dmlproc/dmlprocessor.cpp @@ -1155,8 +1155,28 @@ void DMLServer::start() } cancelThread.join(); } + catch (std::exception& ex) + { + cerr << ex.what() << endl; + logging::LoggingID lid(21); + Message::Args args; + Message message(8); + args.add("DMLProc init caught exception: "); + args.add(ex.what()); + message.format(args); + logging::Logger logger(lid.fSubsysID); + logger.logMessage(logging::LOG_TYPE_CRITICAL, message, lid); + } catch (...) { + cerr << "Caught unknown exception!" << endl; + logging::LoggingID lid(21); + Message::Args args; + Message message(8); + args.add("DMLProc init caught unknown exception"); + message.format(args); + logging::Logger logger(lid.fSubsysID); + logger.logMessage(logging::LOG_TYPE_CRITICAL, message, lid); } } diff --git a/exemgr/main.cpp b/exemgr/main.cpp index 92f949f57..716c2bf54 100644 --- a/exemgr/main.cpp +++ b/exemgr/main.cpp @@ -1300,8 +1300,34 @@ void cleanTempDir() assert(tmpPrefix != "/"); /* This is quite scary as ExeMgr usually runs as root */ - boost::filesystem::remove_all(tmpPrefix); - boost::filesystem::create_directories(tmpPrefix); + try + { + boost::filesystem::remove_all(tmpPrefix); + boost::filesystem::create_directories(tmpPrefix); + } + catch (std::exception& ex) + { + cerr << ex.what() << endl; + LoggingID logid(16, 0, 0); + Message::Args args; + Message message(8); + args.add("Execption whilst cleaning tmpdir: "); + args.add(ex.what()); + message.format( args ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_WARNING, message, logid); + } + catch (...) + { + cerr << "Caught unknown exception during tmpdir cleanup" << endl; + LoggingID logid(16, 0, 0); + Message::Args args; + Message message(8); + args.add("Unknown execption whilst cleaning tmpdir"); + message.format( args ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_WARNING, message, logid); + } } From c50f5fa05dc5e0c9d5fa8093ebc44e9194cc7cbf Mon Sep 17 00:00:00 2001 From: David Hill Date: Mon, 10 Sep 2018 13:03:35 -0500 Subject: [PATCH 11/32] bump version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 8a397bd3a..04362fcf8 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ COLUMNSTORE_VERSION_MAJOR=1 COLUMNSTORE_VERSION_MINOR=1 -COLUMNSTORE_VERSION_PATCH=6 +COLUMNSTORE_VERSION_PATCH=7 COLUMNSTORE_VERSION_RELEASE=1 From 52cbb623171d49e36585409661dd7740777c6992 Mon Sep 17 00:00:00 2001 From: David Hall Date: Mon, 10 Sep 2018 13:53:56 -0500 Subject: [PATCH 12/32] MCOL-1698 Fix code for DISTINCT in UDAnF --- dbcon/mysql/install_calpont_mysql.sh | 5 +- utils/udfsdk/CMakeLists.txt | 2 +- utils/windowfunction/wf_udaf.cpp | 558 +++++++++++++++----- utils/windowfunction/wf_udaf.h | 9 +- utils/windowfunction/windowfunctiontype.cpp | 2 +- 5 files changed, 444 insertions(+), 132 deletions(-) diff --git a/dbcon/mysql/install_calpont_mysql.sh b/dbcon/mysql/install_calpont_mysql.sh index 259e2d182..d9c1290fb 100755 --- a/dbcon/mysql/install_calpont_mysql.sh +++ b/dbcon/mysql/install_calpont_mysql.sh @@ -84,7 +84,10 @@ CREATE FUNCTION idbpartition RETURNS STRING soname 'libcalmysql.so'; CREATE FUNCTION idblocalpm RETURNS INTEGER soname 'libcalmysql.so'; CREATE FUNCTION mcssystemready RETURNS INTEGER soname 'libcalmysql.so'; CREATE FUNCTION mcssystemreadonly RETURNS INTEGER soname 'libcalmysql.so'; -CREATE AGGREGATE FUNCTION regr_avgx RETURNS REAL soname 'libudf_mysql.so'; +CREATE AGGREGATE FUNCTION regr_avgx RETURNS REAL soname 'libregr_mysql.so'; +CREATE AGGREGATE FUNCTION regr_avgy RETURNS REAL soname 'libregr_mysql.so'; +CREATE AGGREGATE FUNCTION regr_count RETURNS INTEGER soname 'libregr_mysql.so'; +CREATE AGGREGATE FUNCTION distinct_count RETURNS INTEGER soname 'libudf_mysql.so'; CREATE DATABASE IF NOT EXISTS infinidb_vtable; CREATE DATABASE IF NOT EXISTS infinidb_querystats; diff --git a/utils/udfsdk/CMakeLists.txt b/utils/udfsdk/CMakeLists.txt index c4d7fa574..417ccb7ed 100755 --- a/utils/udfsdk/CMakeLists.txt +++ b/utils/udfsdk/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ########### next target ############### -set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp median.cpp avg_mode.cpp avgx.cpp) +set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp avg_mode.cpp avgx.cpp distinct_count.cpp) add_definitions(-DMYSQL_DYNAMIC_PLUGIN) diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index 79ed61b52..6255f2da9 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -53,69 +53,11 @@ using namespace joblist; namespace windowfunction { -template -boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context) +boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context) { boost::shared_ptr func; - switch (ct) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DECIMAL: - { - func.reset(new WF_udaf(id, name, context)); - break; - } - - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - case CalpontSystemCatalog::UDECIMAL: - { - func.reset(new WF_udaf(id, name, context)); - break; - } - - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - { - func.reset(new WF_udaf(id, name, context)); - break; - } - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - { - func.reset(new WF_udaf(id, name, context)); - break; - } - - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::VARBINARY: - case CalpontSystemCatalog::TEXT: - case CalpontSystemCatalog::BLOB: - { - func.reset(new WF_udaf(id, name, context)); - break; - } - - default: - { - string errStr = name + "(" + colType2String[ct] + ")"; - errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); - cerr << errStr << endl; - throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); - - break; - } - } + func.reset(new WF_udaf(id, name, context)); // Get the UDAnF function object WF_udaf* wfUDAF = (WF_udaf*)func.get(); @@ -125,30 +67,26 @@ boost::shared_ptr WF_udaf::makeFunction(int id, const str return func; } -template -WF_udaf::WF_udaf(WF_udaf& rhs) : fUDAFContext(rhs.getContext()), +WF_udaf::WF_udaf(WF_udaf& rhs) : fUDAFContext(rhs.getContext()), bInterrupted(rhs.getInterrupted()), fDistinct(rhs.getDistinct()) { getContext().setInterrupted(getInterruptedPtr()); } -template -WindowFunctionType* WF_udaf::clone() const +WindowFunctionType* WF_udaf::clone() const { return new WF_udaf(*const_cast(this)); } -template -void WF_udaf::resetData() +void WF_udaf::resetData() { getContext().getFunction()->reset(&getContext()); fDistinctSet.clear(); WindowFunctionType::resetData(); } -template -void WF_udaf::parseParms(const std::vector& parms) +void WF_udaf::parseParms(const std::vector& parms) { bRespectNulls = true; // The last parms: respect null | ignore null @@ -156,10 +94,13 @@ void WF_udaf::parseParms(const std::vector& parms) idbassert(cc != NULL); bool isNull = false; // dummy, harded coded bRespectNulls = (cc->getIntVal(fRow, isNull) > 0); + if (getContext().getRunFlag(mcsv1sdk::UDAF_DISTINCT)) + { + setDistinct(); + } } -template -bool WF_udaf::dropValues(int64_t b, int64_t e) +bool WF_udaf::dropValues(int64_t b, int64_t e) { if (!bHasDropValue) { @@ -168,6 +109,7 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) } mcsv1sdk::mcsv1_UDAF::ReturnCode rc; + bool isNull = false; // Turn on the Analytic flag so the function is aware it is being called // as a Window Function. @@ -175,14 +117,26 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) // Put the parameter metadata (type, scale, precision) into valsIn mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()]; + ConstantColumn* cc = NULL; for (uint32_t i = 0; i < getContext().getParameterCount(); ++i) { - uint64_t colIn = fFieldIndex[i + 1]; mcsv1sdk::ColumnDatum& datum = valsIn[i]; - datum.dataType = fRow.getColType(colIn); - datum.scale = fRow.getScale(colIn); - datum.precision = fRow.getPrecision(colIn); + cc = static_cast(fConstantParms[i].get()); + + if (cc) + { + datum.dataType = cc->resultType().colDataType; + datum.scale = cc->resultType().scale; + datum.precision = cc->resultType().precision; + } + else + { + uint64_t colIn = fFieldIndex[i + 1]; + datum.dataType = fRow.getColType(colIn); + datum.scale = fRow.getScale(colIn); + datum.precision = fRow.getPrecision(colIn); + } } for (int64_t i = b; i < e; i++) @@ -190,52 +144,325 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) if (i % 1000 == 0 && fStep->cancelled()) break; - bool bHasNull = false; fRow.setData(getPointer(fRowData->at(i))); - // Turn on NULL flags + + // NULL flags uint32_t flags[getContext().getParameterCount()]; + bool bSkipIt = false; for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { + cc = static_cast(fConstantParms[k].get()); uint64_t colIn = fFieldIndex[k + 1]; mcsv1sdk::ColumnDatum& datum = valsIn[k]; - flags[k] = 0; - if (fRow.isNullValue(colIn) == true) + // Turn on Null flags or skip based on respect nulls + flags[k] = 0; + if ((!cc && fRow.isNullValue(colIn) == true) + || (cc && cc->type() == ConstantColumn::NULLDATA)) { if (!bRespectNulls) { - bHasNull = true; + bSkipIt = true; break; } flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - T valIn; - getValue(colIn, valIn, &datum.dataType); - - // Check for distinct, if turned on. - // Currently, distinct only works for param 1 - if (k == 0) + if (!bSkipIt && !(flags[k] & mcsv1sdk::PARAM_IS_NULL)) { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + switch (datum.dataType) { - continue; + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::DATETIME: + { + int64_t valIn; + + if (cc) + { + valIn = cc->getIntVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if (fDistinct) + { + DistinctMap::iterator distinct; + distinct = fDistinctMap.find(valIn); + if (distinct != fDistinctMap.end()) + { + // This is a duplicate: decrement the count + --(*distinct).second; + if ((*distinct).second > 0) // still more of these + { + bSkipIt = true; + continue; + } + else + { + fDistinctMap.erase(distinct); + } + } + } + } + + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::DECIMAL: + case CalpontSystemCatalog::UDECIMAL: + { + int64_t valIn; + + if (cc) + { + valIn = cc->getDecimalVal(fRow, isNull).value; + } + else + { + getValue(colIn, valIn); + } + + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if (fDistinct) + { + DistinctMap::iterator distinct; + distinct = fDistinctMap.find(valIn); + if (distinct != fDistinctMap.end()) + { + // This is a duplicate: decrement the count + --(*distinct).second; + if ((*distinct).second > 0) // still more of these + { + bSkipIt = true; + continue; + } + else + { + fDistinctMap.erase(distinct); + } + } + } + } + + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + { + uint64_t valIn; + + if (cc) + { + valIn = cc->getUintVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if (fDistinct) + { + DistinctMap::iterator distinct; + distinct = fDistinctMap.find(valIn); + if (distinct != fDistinctMap.end()) + { + // This is a duplicate: decrement the count + --(*distinct).second; + if ((*distinct).second > 0) // still more of these + { + bSkipIt = true; + continue; + } + else + { + fDistinctMap.erase(distinct); + } + } + } + } + + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + { + double valIn; + + if (cc) + { + valIn = cc->getDoubleVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if (fDistinct) + { + DistinctMap::iterator distinct; + distinct = fDistinctMap.find(valIn); + if (distinct != fDistinctMap.end()) + { + // This is a duplicate: decrement the count + --(*distinct).second; + if ((*distinct).second > 0) // still more of these + { + bSkipIt = true; + continue; + } + else + { + fDistinctMap.erase(distinct); + } + } + } + } + + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + { + float valIn; + + if (cc) + { + valIn = cc->getFloatVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if (fDistinct) + { + DistinctMap::iterator distinct; + distinct = fDistinctMap.find(valIn); + if (distinct != fDistinctMap.end()) + { + // This is a duplicate: decrement the count + --(*distinct).second; + if ((*distinct).second > 0) // still more of these + { + bSkipIt = true; + continue; + } + else + { + fDistinctMap.erase(distinct); + } + } + } + } + + datum.columnData = valIn; + break; + } + + case CalpontSystemCatalog::CHAR: + case CalpontSystemCatalog::VARCHAR: + case CalpontSystemCatalog::VARBINARY: + case CalpontSystemCatalog::TEXT: + case CalpontSystemCatalog::BLOB: + { + string valIn; + + if (cc) + { + valIn = cc->getStrVal(fRow, isNull); + } + else + { + getValue(colIn, valIn); + } + + // Check for distinct, if turned on. + // Currently, distinct only works on the first parameter. + if (k == 0) + { + if (fDistinct) + { + DistinctMap::iterator distinct; + distinct = fDistinctMap.find(valIn); + if (distinct != fDistinctMap.end()) + { + // This is a duplicate: decrement the count + --(*distinct).second; + if ((*distinct).second > 0) // still more of these + { + bSkipIt = true; + continue; + } + else + { + fDistinctMap.erase(distinct); + } + } + } + } + + datum.columnData = valIn; + break; + } + + default: + { + string errStr = "(" + colType2String[(int)datum.dataType] + ")"; + errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); + cerr << errStr << endl; + throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); + + break; + } } - - if (fDistinct) - fDistinctSet.insert(valIn); } - - datum.columnData = valIn; } - if (bHasNull) + if (bSkipIt) { continue; } + getContext().setDataFlags(flags); + rc = getContext().getFunction()->dropValue(&getContext(), valsIn); if (rc == mcsv1sdk::mcsv1_UDAF::NOT_IMPLEMENTED) @@ -257,8 +484,7 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) } // Sets the value from valOut into column colOut, performing any conversions. -template -void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut, +void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut, int64_t b, int64_t e, int64_t c) { static const static_any::any& charTypeId = (char)1; @@ -279,15 +505,6 @@ void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut, CDT colDataType = fRow.getColType(colOut); - if (valOut.empty()) - { - // If valOut is empty, we return NULL - T* pv = NULL; - setValue(colDataType, b, e, c, pv); - fPrev = c; - return; - } - // This may seem a bit convoluted. Users shouldn't return a type // that they didn't set in mcsv1_UDAF::init(), but this // handles whatever return type is given and casts @@ -405,7 +622,16 @@ void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut, case execplan::CalpontSystemCatalog::BIGINT: case execplan::CalpontSystemCatalog::DECIMAL: case execplan::CalpontSystemCatalog::UDECIMAL: - setValue(colDataType, b, e, c, &intOut); + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + if (valOut.empty()) + { + setValue(colDataType, b, e, c, (int64_t*)NULL); + } + else + { + setValue(colDataType, b, e, c, &intOut); + } break; case execplan::CalpontSystemCatalog::UTINYINT: @@ -416,17 +642,38 @@ void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut, case execplan::CalpontSystemCatalog::DATE: case execplan::CalpontSystemCatalog::DATETIME: case execplan::CalpontSystemCatalog::TIME: - setValue(colDataType, b, e, c, &uintOut); + if (valOut.empty()) + { + setValue(colDataType, b, e, c, (uint64_t*)NULL); + } + else + { + setValue(colDataType, b, e, c, &uintOut); + } break; case execplan::CalpontSystemCatalog::FLOAT: case execplan::CalpontSystemCatalog::UFLOAT: - setValue(colDataType, b, e, c, &floatOut); + if (valOut.empty()) + { + setValue(colDataType, b, e, c, (float*)NULL); + } + else + { + setValue(colDataType, b, e, c, &floatOut); + } break; case execplan::CalpontSystemCatalog::DOUBLE: case execplan::CalpontSystemCatalog::UDOUBLE: - setValue(colDataType, b, e, c, &doubleOut); + if (valOut.empty()) + { + setValue(colDataType, b, e, c, (double*)NULL); + } + else + { + setValue(colDataType, b, e, c, &doubleOut); + } break; case execplan::CalpontSystemCatalog::CHAR: @@ -435,7 +682,14 @@ void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut, case execplan::CalpontSystemCatalog::VARBINARY: case execplan::CalpontSystemCatalog::CLOB: case execplan::CalpontSystemCatalog::BLOB: - setValue(colDataType, b, e, c, &strOut); + if (valOut.empty()) + { + setValue(colDataType, b, e, c, (string*)NULL); + } + else + { + setValue(colDataType, b, e, c, &strOut); + } break; default: @@ -449,8 +703,7 @@ void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut, } } -template -void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) +void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { mcsv1sdk::mcsv1_UDAF::ReturnCode rc; uint64_t colOut = fFieldIndex[0]; @@ -499,7 +752,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) else getContext().clearContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW); - bool bHasNull = false; + bool bSkipIt = false; for (int64_t i = b; i <= e; i++) { @@ -510,7 +763,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // NULL flags uint32_t flags[getContext().getParameterCount()]; - bHasNull = false; + bSkipIt = false; for (uint32_t k = 0; k < getContext().getParameterCount(); ++k) { @@ -526,14 +779,14 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { if (!bRespectNulls) { - bHasNull = true; + bSkipIt = true; break; } flags[k] |= mcsv1sdk::PARAM_IS_NULL; } - if (!bHasNull && !(flags[k] & mcsv1sdk::PARAM_IS_NULL)) + if (!bSkipIt && !(flags[k] & mcsv1sdk::PARAM_IS_NULL)) { switch (datum.dataType) { @@ -542,6 +795,8 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) case CalpontSystemCatalog::MEDINT: case CalpontSystemCatalog::INT: case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::DATETIME: { int64_t valIn; @@ -560,7 +815,21 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) { - continue; + // MCOL-1698 + std::pair val = make_pair(valIn, 1); + // Unordered_map will not insert a duplicate key (valIn). + // If it doesn't insert, the original pair will be returned + // in distinct.first and distinct.second will be a bool -- + // true if newly inserted, false if a duplicate. + std::pair distinct; + distinct = fDistinctMap.insert(val); + if (distinct.second == false) + { + // This is a duplicate: increment the count + ++(*distinct.first).second; + bSkipIt = true; + continue; + } } if (fDistinct) @@ -591,7 +860,15 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) { - continue; + std::pair val = make_pair(valIn, 1); + std::pair distinct; + distinct = fDistinctMap.insert(val); + if (distinct.second == false) + { + ++(*distinct.first).second; + bSkipIt = true; + continue; + } } if (fDistinct) @@ -625,7 +902,15 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) { - continue; + std::pair val = make_pair(valIn, 1); + std::pair distinct; + distinct = fDistinctMap.insert(val); + if (distinct.second == false) + { + ++(*distinct.first).second; + bSkipIt = true; + continue; + } } if (fDistinct) @@ -656,7 +941,15 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) { - continue; + std::pair val = make_pair(valIn, 1); + std::pair distinct; + distinct = fDistinctMap.insert(val); + if (distinct.second == false) + { + ++(*distinct.first).second; + bSkipIt = true; + continue; + } } if (fDistinct) @@ -687,7 +980,15 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) { - continue; + std::pair val = make_pair(valIn, 1); + std::pair distinct; + distinct = fDistinctMap.insert(val); + if (distinct.second == false) + { + ++(*distinct.first).second; + bSkipIt = true; + continue; + } } if (fDistinct) @@ -721,7 +1022,15 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) { if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) { - continue; + std::pair val = make_pair(valIn, 1); + std::pair distinct; + distinct = fDistinctMap.insert(val); + if (distinct.second == false) + { + ++(*distinct.first).second; + bSkipIt = true; + continue; + } } if (fDistinct) @@ -734,7 +1043,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) default: { - string errStr = "(" + colType2String[i] + ")"; + string errStr = "(" + colType2String[(int)datum.dataType] + ")"; errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr); cerr << errStr << endl; throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE); @@ -746,7 +1055,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) } // Skip if any value is NULL and respect nulls is off. - if (bHasNull) + if (bSkipIt) { continue; } @@ -780,8 +1089,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) fPrev = c; } -template -boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context); +boost::shared_ptr WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context); } //namespace // vim:ts=4 sw=4: diff --git a/utils/windowfunction/wf_udaf.h b/utils/windowfunction/wf_udaf.h index ef2ca5853..872865f46 100644 --- a/utils/windowfunction/wf_udaf.h +++ b/utils/windowfunction/wf_udaf.h @@ -45,7 +45,7 @@ public: class DistinctEqual { public: - inline bool operator()(const static_any::any& lhs, static_any::any& rhs) const + inline bool operator()(const static_any::any lhs, static_any::any rhs) const { return lhs == rhs; } @@ -53,7 +53,6 @@ public: // A class to control the execution of User Define Analytic Functions (UDAnF) // as defined by a specialization of mcsv1sdk::mcsv1_UDAF -template class WF_udaf : public WindowFunctionType { public: @@ -92,8 +91,10 @@ protected: bool fDistinct; bool bRespectNulls; // respect null | ignore null bool bHasDropValue; // Set to false when we discover the UDAnF doesn't implement dropValue. - // To hold distinct values - std::tr1::unordered_set fDistinctSet; + // To hold distinct values and their counts + typedef std::tr1::unordered_map DistinctMap; + DistinctMap fDistinctMap; + static_any::any fValOut; // The return value public: diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp index dfceb6364..efede3ef5 100644 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -208,7 +208,7 @@ WindowFunctionType::makeWindowFunction(const string& name, int ct, WindowFunctio break; case WF__UDAF: - af = WF_udaf::makeFunction(functionId, name, ct, wc->getUDAFContext()); + af = WF_udaf::makeFunction(functionId, name, ct, wc->getUDAFContext()); break; case WF__REGR_SLOPE: From 70cec8f484a67d1d22eb3e5c332fe0a1f9b18478 Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 11 Sep 2018 12:02:05 -0500 Subject: [PATCH 13/32] MCOL-1698 get DISTINCT working for UDAnF --- utils/udfsdk/mcsv1_udaf.cpp | 2 +- utils/udfsdk/mcsv1_udaf.h | 1 + utils/windowfunction/wf_udaf.cpp | 158 ++++++++++++------------------- utils/windowfunction/wf_udaf.h | 9 +- 4 files changed, 70 insertions(+), 100 deletions(-) diff --git a/utils/udfsdk/mcsv1_udaf.cpp b/utils/udfsdk/mcsv1_udaf.cpp index 2a93cfad3..9d513ced2 100644 --- a/utils/udfsdk/mcsv1_udaf.cpp +++ b/utils/udfsdk/mcsv1_udaf.cpp @@ -58,7 +58,7 @@ UDAF_MAP& UDAFMap::getMap() // the function names passed to the interface is always in lower case. fm["allnull"] = new allnull(); fm["ssq"] = new ssq(); - fm["median"] = new median(); +// fm["median"] = new median(); fm["avg_mode"] = new avg_mode(); fm["avgx"] = new avgx(); diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index 28db6808b..073b5164a 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -189,6 +189,7 @@ static uint64_t UDAF_WINDOWFRAME_REQUIRED __attribute__ ((unused)) = 1 << 4; // static uint64_t UDAF_WINDOWFRAME_ALLOWED __attribute__ ((unused)) = 1 << 5; // If used as UDAnF, a WINDOW FRAME is optional static uint64_t UDAF_MAYBE_NULL __attribute__ ((unused)) = 1 << 6; // If UDA(n)F might return NULL. static uint64_t UDAF_IGNORE_NULLS __attribute__ ((unused)) = 1 << 7; // If UDA(n)F wants NULL rows suppressed. +static uint64_t UDAF_DISTINCT __attribute__ ((unused)) = 1 << 8; // Force UDA(n)F to be distinct on first param. // Flags set by the framework to define the context of the call. // User code shouldn't use these directly diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index 6255f2da9..b4951edea 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -82,7 +82,7 @@ WindowFunctionType* WF_udaf::clone() const void WF_udaf::resetData() { getContext().getFunction()->reset(&getContext()); - fDistinctSet.clear(); + fDistinctMap.clear(); WindowFunctionType::resetData(); } @@ -179,8 +179,6 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) case CalpontSystemCatalog::MEDINT: case CalpontSystemCatalog::INT: case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DATE: - case CalpontSystemCatalog::DATETIME: { int64_t valIn; @@ -270,6 +268,9 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) case CalpontSystemCatalog::UMEDINT: case CalpontSystemCatalog::UINT: case CalpontSystemCatalog::UBIGINT: + case CalpontSystemCatalog::TIME: + case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::DATETIME: { uint64_t valIn; @@ -622,8 +623,6 @@ void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut, case execplan::CalpontSystemCatalog::BIGINT: case execplan::CalpontSystemCatalog::DECIMAL: case execplan::CalpontSystemCatalog::UDECIMAL: - case execplan::CalpontSystemCatalog::DATE: - case execplan::CalpontSystemCatalog::DATETIME: if (valOut.empty()) { setValue(colDataType, b, e, c, (int64_t*)NULL); @@ -795,8 +794,6 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) case CalpontSystemCatalog::MEDINT: case CalpontSystemCatalog::INT: case CalpontSystemCatalog::BIGINT: - case CalpontSystemCatalog::DATE: - case CalpontSystemCatalog::DATETIME: { int64_t valIn; @@ -811,29 +808,23 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // Check for distinct, if turned on. // Currently, distinct only works on the first parameter. - if (k == 0) + if (k == 0 && fDistinct) { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + // MCOL-1698 + std::pair val = make_pair(valIn, 1); + // Unordered_map will not insert a duplicate key (valIn). + // If it doesn't insert, the original pair will be returned + // in distinct.first and distinct.second will be a bool -- + // true if newly inserted, false if a duplicate. + std::pair distinct; + distinct = fDistinctMap.insert(val); + if (distinct.second == false) { - // MCOL-1698 - std::pair val = make_pair(valIn, 1); - // Unordered_map will not insert a duplicate key (valIn). - // If it doesn't insert, the original pair will be returned - // in distinct.first and distinct.second will be a bool -- - // true if newly inserted, false if a duplicate. - std::pair distinct; - distinct = fDistinctMap.insert(val); - if (distinct.second == false) - { - // This is a duplicate: increment the count - ++(*distinct.first).second; - bSkipIt = true; - continue; - } + // This is a duplicate: increment the count + ++(*distinct.first).second; + bSkipIt = true; + continue; } - - if (fDistinct) - fDistinctSet.insert(valIn); } datum.columnData = valIn; @@ -856,23 +847,17 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // Check for distinct, if turned on. // Currently, distinct only works on the first parameter. - if (k == 0) + if (k == 0 && fDistinct) { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + std::pair val = make_pair(valIn, 1); + std::pair distinct; + distinct = fDistinctMap.insert(val); + if (distinct.second == false) { - std::pair val = make_pair(valIn, 1); - std::pair distinct; - distinct = fDistinctMap.insert(val); - if (distinct.second == false) - { - ++(*distinct.first).second; - bSkipIt = true; - continue; - } + ++(*distinct.first).second; + bSkipIt = true; + continue; } - - if (fDistinct) - fDistinctSet.insert(valIn); } datum.columnData = valIn; @@ -884,6 +869,9 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) case CalpontSystemCatalog::UMEDINT: case CalpontSystemCatalog::UINT: case CalpontSystemCatalog::UBIGINT: + case CalpontSystemCatalog::TIME: + case CalpontSystemCatalog::DATE: + case CalpontSystemCatalog::DATETIME: { uint64_t valIn; @@ -898,23 +886,17 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // Check for distinct, if turned on. // Currently, distinct only works on the first parameter. - if (k == 0) + if (k == 0 && fDistinct) { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + std::pair val = make_pair(valIn, 1); + std::pair distinct; + distinct = fDistinctMap.insert(val); + if (distinct.second == false) { - std::pair val = make_pair(valIn, 1); - std::pair distinct; - distinct = fDistinctMap.insert(val); - if (distinct.second == false) - { - ++(*distinct.first).second; - bSkipIt = true; - continue; - } + ++(*distinct.first).second; + bSkipIt = true; + continue; } - - if (fDistinct) - fDistinctSet.insert(valIn); } datum.columnData = valIn; @@ -937,23 +919,17 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // Check for distinct, if turned on. // Currently, distinct only works on the first parameter. - if (k == 0) + if (k == 0 && fDistinct) { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + std::pair val = make_pair(valIn, 1); + std::pair distinct; + distinct = fDistinctMap.insert(val); + if (distinct.second == false) { - std::pair val = make_pair(valIn, 1); - std::pair distinct; - distinct = fDistinctMap.insert(val); - if (distinct.second == false) - { - ++(*distinct.first).second; - bSkipIt = true; - continue; - } + ++(*distinct.first).second; + bSkipIt = true; + continue; } - - if (fDistinct) - fDistinctSet.insert(valIn); } datum.columnData = valIn; @@ -976,23 +952,17 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // Check for distinct, if turned on. // Currently, distinct only works on the first parameter. - if (k == 0) + if (k == 0 && fDistinct) { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + std::pair val = make_pair(valIn, 1); + std::pair distinct; + distinct = fDistinctMap.insert(val); + if (distinct.second == false) { - std::pair val = make_pair(valIn, 1); - std::pair distinct; - distinct = fDistinctMap.insert(val); - if (distinct.second == false) - { - ++(*distinct.first).second; - bSkipIt = true; - continue; - } + ++(*distinct.first).second; + bSkipIt = true; + continue; } - - if (fDistinct) - fDistinctSet.insert(valIn); } datum.columnData = valIn; @@ -1018,23 +988,17 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // Check for distinct, if turned on. // Currently, distinct only works on the first parameter. - if (k == 0) + if (k == 0 && fDistinct) { - if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end())) + std::pair val = make_pair(valIn, 1); + std::pair distinct; + distinct = fDistinctMap.insert(val); + if (distinct.second == false) { - std::pair val = make_pair(valIn, 1); - std::pair distinct; - distinct = fDistinctMap.insert(val); - if (distinct.second == false) - { - ++(*distinct.first).second; - bSkipIt = true; - continue; - } + ++(*distinct.first).second; + bSkipIt = true; + continue; } - - if (fDistinct) - fDistinctSet.insert(valIn); } datum.columnData = valIn; diff --git a/utils/windowfunction/wf_udaf.h b/utils/windowfunction/wf_udaf.h index 872865f46..38515285f 100644 --- a/utils/windowfunction/wf_udaf.h +++ b/utils/windowfunction/wf_udaf.h @@ -22,9 +22,9 @@ #define UTILS_WF_UDAF_H #ifndef _MSC_VER -#include +#include #else -#include +#include #endif #include "windowfunctiontype.h" #include "mcsv1_udaf.h" @@ -83,6 +83,11 @@ public: return fDistinct; } + void setDistinct(bool d = true) + { + fDistinct = d; + } + protected: void SetUDAFValue(static_any::any& valOut, int64_t colOut, int64_t b, int64_t e, int64_t c); From f7a2b50b21d3bbe1fd2cb6bec850b92f0ed9c5c7 Mon Sep 17 00:00:00 2001 From: David Hill Date: Tue, 11 Sep 2018 15:47:25 -0500 Subject: [PATCH 14/32] MCOL-1699 - fix iss with adddbroot amazon --- oam/oamcpp/liboamcpp.cpp | 37 +++++++++++++++++++++---------------- procmon/main.cpp | 2 +- procmon/processmonitor.cpp | 4 +++- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/oam/oamcpp/liboamcpp.cpp b/oam/oamcpp/liboamcpp.cpp index 7483ca239..baa8dbe94 100644 --- a/oam/oamcpp/liboamcpp.cpp +++ b/oam/oamcpp/liboamcpp.cpp @@ -7713,7 +7713,7 @@ namespace oam // run script to get Instance status and IP Address string cmd = InstallDir + "/bin/MCSInstanceCmds.sh getInstance > /tmp/getInstanceInfo_" + name; int status = system(cmd.c_str()); - if (WEXITSTATUS(status) != 0 ) + if (WEXITSTATUS(status) == 1 ) return "failed"; // get Instance Name @@ -7744,7 +7744,7 @@ namespace oam // run script to get Instance status and IP Address string cmd = InstallDir + "/bin/MCSInstanceCmds.sh getType > /tmp/getInstanceType_" + name; int status = system(cmd.c_str()); - if (WEXITSTATUS(status) != 0 ) + if (WEXITSTATUS(status) == 1 ) return "failed"; // get Instance Name @@ -7775,7 +7775,7 @@ namespace oam // run script to get Instance Subnet string cmd = InstallDir + "/bin/MCSInstanceCmds.sh getSubnet > /tmp/getInstanceSubnet_" + name; int status = system(cmd.c_str()); - if (WEXITSTATUS(status) != 0 ) + if (WEXITSTATUS(status) == 1 ) return "failed"; // get Instance Name @@ -7807,7 +7807,7 @@ namespace oam // run script to get Instance status and IP Address string cmd = InstallDir + "/bin/MCSInstanceCmds.sh launchInstance " + IPAddress + " " + type + " " + group + " > /tmp/getInstance_" + name; int status = system(cmd.c_str()); - if (WEXITSTATUS(status) != 0 ) + if (WEXITSTATUS(status) == 1 ) return "failed"; if (checkLogStatus("/tmp/getInstance", "Required") ) @@ -7883,7 +7883,7 @@ namespace oam // run script to get Instance status and IP Address string cmd = InstallDir + "/bin/MCSInstanceCmds.sh startInstance " + instanceName + " > /tmp/startEC2Instance_" + instanceName; int ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) != 0 ) + if (WEXITSTATUS(ret) == 1 ) return false; return true; @@ -7902,7 +7902,7 @@ namespace oam // run script to get Instance status and IP Address string cmd = InstallDir + "/bin/MCSInstanceCmds.sh assignElasticIP " + instanceName + " " + IpAddress + " > /tmp/assignElasticIP_" + instanceName; int ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) != 0 ) + if (WEXITSTATUS(ret) == 1 ) exceptionControl("assignElasticIP", oam::API_FAILURE); return true; @@ -7921,7 +7921,7 @@ namespace oam // run script to get Instance status and IP Address string cmd = InstallDir + "/bin/MCSInstanceCmds.sh deassignElasticIP " + IpAddress + " > /tmp/deassignElasticIP_" + IpAddress; int ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) != 0 ) + if (WEXITSTATUS(ret) == 1 ) exceptionControl("deassignElasticIP", oam::API_FAILURE); return true; @@ -7940,8 +7940,9 @@ namespace oam // run script to get Volume Status string cmd = InstallDir + "/bin/MCSVolumeCmds.sh describe " + volumeName + " > /tmp/getVolumeStatus_" + volumeName; int ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) != 0 ) + if (WEXITSTATUS(ret) == 1 ){ return "failed"; + } // get status string status; @@ -7971,7 +7972,7 @@ namespace oam // run script to get Volume Status string cmd = InstallDir + "/bin/MCSVolumeCmds.sh create " + size + " " + name + " > /tmp/createVolumeStatus_" + name; int ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) != 0 ) + if (WEXITSTATUS(ret) == 1 ) return "failed"; // get status @@ -8016,11 +8017,15 @@ namespace oam string cmd = InstallDir + "/bin/MCSVolumeCmds.sh attach " + volumeName + " " + instanceName + " " + deviceName + " > /tmp/attachVolumeStatus_" + volumeName; ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) == 0 ) + if (WEXITSTATUS(ret) == 1 ) + { + //failing to attach, dettach and retry + writeLog("attachEC2Volume: Attach failed, call detach:" + volumeName + " " + instanceName + " " + deviceName, LOG_TYPE_ERROR ); + + detachEC2Volume(volumeName); + } + else return true; - - //failing to attach, dettach and retry - detachEC2Volume(volumeName); } if (ret == 0 ) @@ -8042,7 +8047,7 @@ namespace oam // run script to attach Volume string cmd = InstallDir + "/bin/MCSVolumeCmds.sh detach " + volumeName + " > /tmp/detachVolumeStatus_" + volumeName; int ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) != 0 ) + if (WEXITSTATUS(ret) == 1 ) return false; return true; @@ -8061,7 +8066,7 @@ namespace oam // run script to delete Volume string cmd = InstallDir + "/bin/MCSVolumeCmds.sh delete " + volumeName + " > /tmp/deleteVolumeStatus_" + volumeName; int ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) != 0 ) + if (WEXITSTATUS(ret) == 1 ) return false; return true; @@ -8080,7 +8085,7 @@ namespace oam // run script to create a tag string cmd = InstallDir + "/bin/MCSVolumeCmds.sh createTag " + resourceName + " " + tagName + " " + tagValue + " > /tmp/createTagStatus_" + resourceName; int ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) != 0 ) + if (WEXITSTATUS(ret) == 1 ) return false; return true; diff --git a/procmon/main.cpp b/procmon/main.cpp index ad05a4f95..d877bdcfe 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -210,7 +210,7 @@ int main(int argc, char **argv) } catch(...) {} - if ( cloud == "amazon-ec2" ) { + if ( cloud == "amazon-ec2" || cloud == "amazon-vpc" ) { if(!aMonitor.amazonIPCheck()) { log.writeLog(__LINE__, "ERROR: amazonIPCheck failed, exiting", LOG_TYPE_CRITICAL); sleep(2); diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index efa01c449..bf471e62c 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -5491,7 +5491,6 @@ bool ProcessMonitor::amazonIPCheck() log.writeLog(__LINE__, "Assign Elastic IP Address failed : '" + moduleName + "' / '" + ELIPaddress, LOG_TYPE_ERROR); break; } - break; } @@ -5653,8 +5652,11 @@ bool ProcessMonitor::amazonVolumeCheck(int dbrootID) {} if (oam.attachEC2Volume(volumeName, deviceName, instanceName)) { + log.writeLog(__LINE__, "amazonVolumeCheck function , volume to attached: " + volumeName, LOG_TYPE_DEBUG); + string cmd = "mount " + startup::StartUp::installDir() + "/data" + oam.itoa(dbrootID) + " > /dev/null"; system(cmd.c_str()); + log.writeLog(__LINE__, "amazonVolumeCheck function , volume to mounted: " + volumeName, LOG_TYPE_DEBUG); return true; } else From 21f108896d02d74a2e6ca9945d40f67e816dd176 Mon Sep 17 00:00:00 2001 From: David Hill Date: Wed, 12 Sep 2018 08:36:13 -0500 Subject: [PATCH 15/32] MCOL-1523 - fix compile issue --- procmon/main.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/procmon/main.cpp b/procmon/main.cpp index a3b8d52ef..ac7c761cc 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -2443,10 +2443,6 @@ void processStatusMSG(messageqcpp::IOSocket* cfIos) memcpy(fShmSystemStatus[0].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set System State = " + oamState[state], LOG_TYPE_DEBUG); } - - BRM::DBRM dbrm; - dbrm.setSystemQueryReady(true); - } } break; From 8ec02bfce5efd124a950e4be706b037df81f147f Mon Sep 17 00:00:00 2001 From: David Hill Date: Wed, 12 Sep 2018 14:31:23 -0500 Subject: [PATCH 16/32] MCOL-1423 --- procmgr/processmanager.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 97e42533f..42d1c167b 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -2846,10 +2846,13 @@ void processMSG(messageqcpp::IOSocket* cfIos) } - // if a DDLProc was restarted, reinit DMLProc + // if a DDLProc was restarted, restart DMLProc if( processName == "DDLProc") { processManager.reinitProcessType("DMLProc"); + //set query system states ready processManager.setQuerySystemState(true); + + processManager.setSystemState(oam::ACTIVE); } //only run on auto process restart @@ -2900,6 +2903,8 @@ void processMSG(messageqcpp::IOSocket* cfIos) processManager.setQuerySystemState(true); processManager.setSystemState(oam::ACTIVE); + + log.writeLog(__LINE__, "MSG RECEIVED: Process Restarted Completed"); } break; From 3fac7b1e199272afea4ab6b6e3d791c466127207 Mon Sep 17 00:00:00 2001 From: David Hall Date: Thu, 13 Sep 2018 14:12:10 -0500 Subject: [PATCH 17/32] MCOL-521 more multi-param work with constant NULL --- utils/rowgroup/rowaggregation.cpp | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index f9db8b266..d08781c07 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -2015,18 +2015,9 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, for (uint32_t i = 0; i < paramCount; ++i) { - // If UDAF_IGNORE_NULLS is on, bIsNull gets set the first time - // we find a null. We still need to eat the rest of the parameters - // to sync updateEntry - if (bIsNull) - { - ++funcColsIdx; - continue; - } - SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; mcsv1sdk::ColumnDatum& datum = valsIn[i]; - // Turn on NULL flags + // Turn on NULL flags based on the data dataFlags[i] = 0; // If this particular parameter is a constant, then we need @@ -2043,9 +2034,11 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, { if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS)) { - bIsNull = true; - ++funcColsIdx; - continue; + // When Ignore nulls, if there are multiple parameters and any + // one of them is NULL, we ignore the entry. We need to increment + // funcColsIdx the number of extra parameters. + funcColsIdx += paramCount - i - 1; + return; } dataFlags[i] |= mcsv1sdk::PARAM_IS_NULL; From 3ac9d93597f2fba2d8eebcad2ab434460c58c486 Mon Sep 17 00:00:00 2001 From: David Hill Date: Sat, 15 Sep 2018 14:28:46 -0500 Subject: [PATCH 18/32] MCOL-1523 - addiiotnal fixes --- oamapps/mcsadmin/mcsadmin.cpp | 2 +- procmgr/processmanager.cpp | 10 +++++++--- procmon/main.cpp | 8 ++++++-- procmon/processmonitor.cpp | 7 ------- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/oamapps/mcsadmin/mcsadmin.cpp b/oamapps/mcsadmin/mcsadmin.cpp index befcb68fa..29d53b35f 100644 --- a/oamapps/mcsadmin/mcsadmin.cpp +++ b/oamapps/mcsadmin/mcsadmin.cpp @@ -7036,7 +7036,7 @@ int processCommand(string* arguments) if (systemstatus.SystemOpState == oam::ACTIVE ) { try { - cout << endl << " Restarting System "; + cout << endl << " Restarting System " << endl; gracefulTemp = oam::FORCEFUL; int returnStatus = oam.restartSystem(gracefulTemp, ackTemp); switch (returnStatus) diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 42d1c167b..8ce936c97 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -1637,7 +1637,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) { log.writeLog(__LINE__, "Setup MySQL Replication for restartSystem FORCE", LOG_TYPE_DEBUG); oam::DeviceNetworkList devicenetworklist; - processManager.setMySQLReplication(devicenetworklist); + processManager.setMySQLReplication(devicenetworklist, oam::UnassignedName, true); } log.writeLog(__LINE__, "RESTARTSYSTEM: Start System Request Completed", LOG_TYPE_INFO); @@ -2769,12 +2769,16 @@ void processMSG(messageqcpp::IOSocket* cfIos) processManager.reinitProcessType("cpimport"); //request reinit after Process is active - for ( int i = 0; i < 600 ; i++ ) { + for ( int i = 0; i < 10 ; i++ ) { try { ProcessStatus procstat; oam.getProcessStatus(processName, moduleName, procstat); - if (procstat.ProcessOpState == oam::ACTIVE) { + if (procstat.ProcessOpState == oam::COLD_STANDBY) + break; + + if ( (procstat.ProcessOpState == oam::ACTIVE) || + (procstat.ProcessOpState == oam::STANDBY) ) { // if a PrimProc was restarted, reinit ACTIVE ExeMgr(s) and DDL/DMLProc if( processName == "PrimProc") { diff --git a/procmon/main.cpp b/procmon/main.cpp index 4be046511..63f0140b2 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -1388,7 +1388,7 @@ static void chldHandleThread(MonitorConfig config) catch(...) {} - // check if process failover is needed due to process outage + // check if Mdoule failover is needed due to process outage aMonitor.checkModuleFailover((*listPtr).ProcessName); //check the db health @@ -1463,15 +1463,19 @@ static void chldHandleThread(MonitorConfig config) restartStatus = " restart failed with hard failure, don't retry!!"; (*listPtr).processID = 0; - // check if process failover is needed due to process outage + // check if Module failover is needed due to process outage aMonitor.checkModuleFailover((*listPtr).ProcessName); break; } else { if ( (*listPtr).processID != oam::API_MINOR_FAILURE ) + { //restarted successful + //Inform Process Manager that Process restart + aMonitor.processRestarted( (*listPtr).ProcessName, false); break; + } } // restart failed with minor error, sleep and try sleep(5); diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index bf471e62c..2109d8ed1 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -621,9 +621,6 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO else log.writeLog(__LINE__, "START: process already active " + processName); - //Inform Process Manager that Process restart - //processRestarted(processName); - ackMsg << (ByteStream::byte) ACK; ackMsg << (ByteStream::byte) START; ackMsg << (ByteStream::byte) requestStatus; @@ -720,9 +717,6 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO requestStatus = API_FAILURE; } - //Inform Process Manager that Process restart - //processRestarted(processName); - ackMsg << (ByteStream::byte) ACK; ackMsg << (ByteStream::byte) RESTART; ackMsg << (ByteStream::byte) requestStatus; @@ -4650,7 +4644,6 @@ void ProcessMonitor::checkModuleFailover( std::string processName) systemprocessstatus.processstatus[i].ProcessOpState == oam::FAILED ) { // found a AVAILABLE mate, start it log.writeLog(__LINE__, "Change UM Master to module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG); - log.writeLog(__LINE__, "Disable local UM module " + config.moduleName(), LOG_TYPE_DEBUG); log.writeLog(__LINE__, "Stop local UM module " + config.moduleName(), LOG_TYPE_DEBUG); log.writeLog(__LINE__, "Disable Local will Enable UM module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG); From 24c5e937565f044919b18662309db4a133ed9291 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Fri, 21 Sep 2018 09:50:10 +0100 Subject: [PATCH 19/32] MCOL-1737 Add debug logging options for LRU cache This adds options which are user enabled to debug the LRU cache inside ColumnStore. Specifically cache flushing. It adds the following: * PrimProc flush information when SIGUSR2 mode is enabled * cpimport dictionary flush information when -d2 is used * WriteEngineServer DML flush information to STDERR --- primitives/blockcache/filebuffermgr.cpp | 74 ++++++++++++++++++++++- writeengine/bulk/we_tableinfo.cpp | 11 ++++ writeengine/server/we_dmlcommandproc.cpp | 3 + writeengine/shared/we_bulkrollbackmgr.cpp | 3 + 4 files changed, 88 insertions(+), 3 deletions(-) diff --git a/primitives/blockcache/filebuffermgr.cpp b/primitives/blockcache/filebuffermgr.cpp index e6cab58ac..eb5f9a2a9 100644 --- a/primitives/blockcache/filebuffermgr.cpp +++ b/primitives/blockcache/filebuffermgr.cpp @@ -117,7 +117,10 @@ void FileBufferMgr::flushCache() // the block pool should not be freed in the above block to allow us // to continue doing concurrent unprotected-but-"safe" memcpys // from that memory - + if (fReportFrequency) + { + fLog << "Clearing entire cache" << endl; + } fFBPool.clear(); // fFBPool.reserve(fMaxNumBlocks); } @@ -150,6 +153,15 @@ void FileBufferMgr::flushMany(const LbidAtVer* laVptr, uint32_t cnt) BRM::LBID_t lbid; BRM::VER_t ver; filebuffer_uset_iter_t iter; + if (fReportFrequency) + { + fLog << "flushMany " << cnt << " items: "; + for (uint32_t j = 0; j < cnt; j++) + { + fLog << "lbid: " << laVptr[j].LBID << " ver: " << laVptr[j].Ver << ", "; + } + fLog << endl; + } for (uint32_t j = 0; j < cnt; j++) { lbid = static_cast(laVptr->LBID); @@ -157,6 +169,10 @@ void FileBufferMgr::flushMany(const LbidAtVer* laVptr, uint32_t cnt) iter = fbSet.find(HashObject_t(lbid, ver, 0)); if (iter != fbSet.end()) { + if (fReportFrequency) + { + fLog << "flushMany hit, lbid: " << lbid << " index: " << iter->poolIdx << endl; + } //remove it from fbList uint32_t idx = iter->poolIdx; fbList.erase(fFBPool[idx].listLoc()); @@ -179,6 +195,16 @@ void FileBufferMgr::flushManyAllversion(const LBID_t* laVptr, uint32_t cnt) mutex::scoped_lock lk(fWLock); + if (fReportFrequency) + { + fLog << "flushManyAllversion " << cnt << " items: "; + for (uint32_t i = 0; i < cnt; i++) + { + fLog << laVptr[i] << ", "; + } + fLog << endl; + } + if (fCacheSize == 0 || cnt == 0) return; @@ -187,6 +213,10 @@ void FileBufferMgr::flushManyAllversion(const LBID_t* laVptr, uint32_t cnt) for (it = fbSet.begin(); it != fbSet.end();) { if (uniquer.find(it->lbid) != uniquer.end()) { + if (fReportFrequency) + { + fLog << "flushManyAllversion hit: " << it->lbid << " index: " << it->poolIdx << endl; + } const uint32_t idx = it->poolIdx; fbList.erase(fFBPool[idx].listLoc()); fEmptyPoolSlots.push_back(idx); @@ -213,6 +243,16 @@ void FileBufferMgr::flushOIDs(const uint32_t *oids, uint32_t count) pair itList; filebuffer_uset_t::iterator it; + if (fReportFrequency) + { + fLog << "flushOIDs " << count << " items: "; + for (uint32_t i = 0; i < count; i++) + { + fLog << oids[i] << ", "; + } + fLog << endl; + } + // If there are more than this # of extents to drop, the whole cache will be cleared const uint32_t clearThreshold = 50000; @@ -269,6 +309,22 @@ void FileBufferMgr::flushPartition(const vector &oids, const set::iterator sit; + fLog << "flushPartition oids: "; + for (uint32_t i = 0; i < count; i++) + { + fLog << oids[i] << ", "; + } + fLog << "flushPartition partitions: "; + for (sit = partitions.begin(); sit != partitions.end(); ++sit) + { + fLog << (*sit).toString() << ", "; + } + fLog << endl; + } + if (fCacheSize == 0 || oids.size() == 0 || partitions.size() == 0) return; @@ -496,7 +552,7 @@ int FileBufferMgr::insert(const BRM::LBID_t lbid, const BRM::VER_t ver, const ui if (fReportFrequency && (fBlksLoaded%fReportFrequency)==0) { struct timespec tm; clock_gettime(CLOCK_MONOTONIC, &tm); - fLog + fLog << "insert: " << left << fixed << ((double)(tm.tv_sec+(1.e-9*tm.tv_nsec))) << " " << right << setw(12) << fBlksLoaded << " " << right << setw(12) << fBlksNotUsed << endl; @@ -671,6 +727,11 @@ int FileBufferMgr::bulkInsert(const vector &ops) mutex::scoped_lock lk(fWLock); + if (fReportFrequency) + { + fLog << "bulkInsert: "; + } + for (i = 0; i < ops.size(); i++) { const CacheInsert_t &op = ops[i]; @@ -694,7 +755,10 @@ int FileBufferMgr::bulkInsert(const vector &ops) continue; } - //cout << "FBM: inserting <" << op.lbid << ", " << op.ver << endl; + if (fReportFrequency) + { + fLog << op.lbid << " " << op.ver << ", "; + } fCacheSize++; fBlksLoaded++; FBData_t fbdata = {op.lbid, op.ver, 0}; @@ -712,6 +776,10 @@ int FileBufferMgr::bulkInsert(const vector &ops) #endif ret++; } + if (fReportFrequency) + { + fLog << endl; + } idbassert(fCacheSize <= maxCacheSize()); return ret; diff --git a/writeengine/bulk/we_tableinfo.cpp b/writeengine/bulk/we_tableinfo.cpp index 0e651b55d..922e0a24b 100644 --- a/writeengine/bulk/we_tableinfo.cpp +++ b/writeengine/bulk/we_tableinfo.cpp @@ -709,6 +709,17 @@ int TableInfo::setParseComplete(const int &columnId, #ifdef PROFILE Stats::startParseEvent(WE_STATS_FLUSH_PRIMPROC_BLOCKS); #endif + if (fLog->isDebug(DEBUG_2)) + { + ostringstream oss; + oss << "Dictionary cache flush: "; + for (uint32_t i = 0; i < fDictFlushBlks.size(); i++) + { + oss << fDictFlushBlks[i] << ", "; + } + oss << endl; + fLog->logMsg( oss.str(), MSGLVL_INFO1 ); + } cacheutils::flushPrimProcAllverBlocks(fDictFlushBlks); #ifdef PROFILE Stats::stopParseEvent(WE_STATS_FLUSH_PRIMPROC_BLOCKS); diff --git a/writeengine/server/we_dmlcommandproc.cpp b/writeengine/server/we_dmlcommandproc.cpp index 0fda76182..90831afd6 100644 --- a/writeengine/server/we_dmlcommandproc.cpp +++ b/writeengine/server/we_dmlcommandproc.cpp @@ -2036,10 +2036,13 @@ uint8_t WE_DMLCommandProc::commitBatchAutoOn(messageqcpp::ByteStream& bs, std::s { std::set::iterator lbidIter; std::vector dictFlushBlks; + cerr << "API Flushing blocks: "; for (lbidIter = (*mapIter).second.begin(); lbidIter != (*mapIter).second.end(); lbidIter++) { + cerr << *lbidIter << ", "; dictFlushBlks.push_back((*lbidIter)); } + cerr << endl; cacheutils::flushPrimProcAllverBlocks(dictFlushBlks); fWEWrapper.getDictMap().erase(txnID); } diff --git a/writeengine/shared/we_bulkrollbackmgr.cpp b/writeengine/shared/we_bulkrollbackmgr.cpp index 2c41734e0..6f8c986b8 100644 --- a/writeengine/shared/we_bulkrollbackmgr.cpp +++ b/writeengine/shared/we_bulkrollbackmgr.cpp @@ -195,13 +195,16 @@ int BulkRollbackMgr::rollback ( bool keepMetaFile ) // the user but keep going. std::vector allOIDs; std::set::const_iterator iter=fAllColDctOIDs.begin(); + cerr << "Rollback flushing: "; while (iter != fAllColDctOIDs.end()) { + cerr << *iter << ", "; //std::cout << "Flushing OID from PrimProc cache " << *iter << // std::endl; allOIDs.push_back(*iter); ++iter; } + cerr << endl; int cache_rc = cacheutils::flushOIDsFromCache( allOIDs ); if (cache_rc != 0) From d930a1e32222e0c56f19edc2b2c4ed006664ebde Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 25 Sep 2018 16:31:10 -0500 Subject: [PATCH 20/32] MCOL-521 Some more fixes for multi-parm aggregates. Add regr slope --- dbcon/joblist/tupleaggregatestep.cpp | 197 +++++++++++++++++++----- dbcon/mysql/ha_window_function.cpp | 3 + dbcon/mysql/install_calpont_mysql.sh | 3 + genii.vpw | 1 + utils/regr/CMakeLists.txt | 26 ++++ utils/regr/regr.vpj | 5 + utils/regr/regr_avgx.cpp | 133 +--------------- utils/regr/regr_avgy.cpp | 132 +--------------- utils/regr/regr_slope.cpp | 197 ++++++++++++++++++++++++ utils/regr/regr_slope.h | 88 +++++++++++ utils/regr/regrmysql.cpp | 222 ++++++++++++++++++++++++++- utils/rowgroup/rowaggregation.h | 2 +- utils/udfsdk/mcsv1_udaf.h | 72 +++++++++ 13 files changed, 776 insertions(+), 305 deletions(-) create mode 100755 utils/regr/CMakeLists.txt create mode 100644 utils/regr/regr_slope.cpp create mode 100644 utils/regr/regr_slope.h diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 97a577f4f..da4f73823 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -76,21 +76,55 @@ namespace struct cmpTuple { - bool operator()(boost::tuple a, - boost::tuple b) + bool operator()(boost::tuple* > a, + boost::tuple* > b) { - if (boost::get<0>(a) < boost::get<0>(b)) + uint32_t keya = boost::get<0>(a); + uint32_t keyb = boost::get<0>(b); + int opa; + int opb; + mcsv1sdk::mcsv1_UDAF* pUDAFa; + mcsv1sdk::mcsv1_UDAF* pUDAFb; + + // If key is less than + if (keya < keyb) return true; - - if (boost::get<0>(a) == boost::get<0>(b)) + if (keya == keyb) { - if (boost::get<1>(a) < boost::get<1>(b)) + // test Op + opa = boost::get<1>(a); + opb = boost::get<1>(b); + if (opa < opb) return true; + if (opa == opb) + { + // look at the UDAF object + pUDAFa = boost::get<2>(a); + pUDAFb = boost::get<2>(b); + if (pUDAFa < pUDAFb) + return true; + if (pUDAFa == pUDAFb) + { + if (pUDAFa == NULL) + return false; + std::vector* paramKeysa = boost::get<3>(a); + std::vector* paramKeysb = boost::get<3>(b); - if (boost::get<1>(a) == boost::get<1>(b)) - return boost::get<2>(a) < boost::get<2>(b); + if (paramKeysa->size() < paramKeysb->size()) + return true; + if (paramKeysa->size() == paramKeysb->size()) + { + if (paramKeysa == NULL) + return false; + for (uint64_t i = 0; i < paramKeysa->size(); ++i) + { + if ((*paramKeysa)[i] < (*paramKeysb)[i]) + return true; + } + } + } + } } - return false; } }; @@ -101,7 +135,7 @@ typedef vector RowBucketVec; // The AGG_MAP type is used to maintain a list of aggregate functions in order to // detect duplicates. Since all UDAF have the same op type (ROWAGG_UDAF), we add in // the function pointer in order to ensure uniqueness. -typedef map, uint64_t, cmpTuple> AGG_MAP; +typedef map* >, uint64_t, cmpTuple> AGG_MAP; inline RowAggFunctionType functionIdMap(int planFuncId) { @@ -796,7 +830,6 @@ const string TupleAggregateStep::toString() const return oss.str(); } - SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo) { SJSTEP spjs; @@ -1301,6 +1334,16 @@ void TupleAggregateStep::prep1PhaseAggregate( if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); + // Save the multi-parm keys for dup-detection. + if (pUDAFFunc && udafc->getContext().getParamKeys()->size() == 0) + { + for (uint64_t k = i+1; + k < returnedColVec.size() && returnedColVec[k].second == AggregateColumn::MULTI_PARM; + ++k) + { + udafc->getContext().getParamKeys()->push_back(returnedColVec[k].first); + } + } // Create a RowAggFunctionCol (UDAF subtype) with the context. funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, outIdx)); break; @@ -1502,7 +1545,7 @@ void TupleAggregateStep::prep1PhaseAggregate( } // find if this func is a duplicate - AGG_MAP::iterator iter = aggFuncMap.find(boost::make_tuple(key, aggOp, pUDAFFunc)); + AGG_MAP::iterator iter = aggFuncMap.find(boost::make_tuple(key, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (iter != aggFuncMap.end()) { @@ -1519,7 +1562,7 @@ void TupleAggregateStep::prep1PhaseAggregate( } else { - aggFuncMap.insert(make_pair(boost::make_tuple(key, aggOp, pUDAFFunc), funct->fOutputColumnIndex)); + aggFuncMap.insert(make_pair(boost::make_tuple(key, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), funct->fOutputColumnIndex)); } if (aggOp != ROWAGG_MULTI_PARM) @@ -1740,7 +1783,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(widthProj[colProj]); - aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[colAgg], 0, pUDAFFunc), colAgg)); + aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[colAgg], 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAgg)); colAgg++; } @@ -1781,7 +1824,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(typeProj[colProj]); widthAgg.push_back(widthProj[colProj]); - aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[colAgg], 0, pUDAFFunc), colAgg)); + aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[colAgg], 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAgg)); colAgg++; } @@ -1811,7 +1854,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( aggOp, stats, colAgg, colAgg, -1)); functionVec1.push_back(funct); - aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc), colAgg)); + aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAgg)); colAgg++; continue; @@ -1858,6 +1901,16 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); + // Save the multi-parm keys for dup-detection. + if (pUDAFFunc && udafc->getContext().getParamKeys()->size() == 0) + { + for (uint64_t k = i+1; + k < aggColVec.size() && aggColVec[k].second == AggregateColumn::MULTI_PARM; + ++k) + { + udafc->getContext().getParamKeys()->push_back(aggColVec[k].first); + } + } // Create a RowAggFunctionCol (UDAF subtype) with the context. funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAgg)); break; @@ -1874,11 +1927,11 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } // skip if this is a duplicate - if (aggFuncMap.find(boost::make_tuple(aggKey, aggOp, pUDAFFunc)) != aggFuncMap.end()) + if (aggFuncMap.find(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)) != aggFuncMap.end()) continue; functionVec1.push_back(funct); - aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc), colAgg)); + aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAgg)); switch (aggOp) { @@ -2103,6 +2156,10 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // If the param is const if (udafc) { + if (udafcParamIdx > udafc->aggParms().size() - 1) + { + throw QueryDataExcept("prep1PhaseDistinctAggregate: UDAF multi function with too many parms", aggregateFuncErr); + } ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); if (cc) { @@ -2162,7 +2219,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( { SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(i, -1)); groupByNoDist.push_back(groupby); - aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc), i)); + aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[i], 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), i)); } // locate the return column position in aggregated rowgroup @@ -2186,7 +2243,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { - AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, 0, pUDAFFunc)); + AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (it != aggFuncMap.end()) { @@ -2218,6 +2275,16 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); + // Save the multi-parm keys for dup-detection. + if (pUDAFFunc && udafc->getContext().getParamKeys()->size() == 0) + { + for (uint64_t k = i+1; + k < returnedColVec.size() && returnedColVec[k].second == AggregateColumn::MULTI_PARM; + ++k) + { + udafc->getContext().getParamKeys()->push_back(returnedColVec[k].first); + } + } break; } } @@ -2318,7 +2385,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( case ROWAGG_BIT_XOR: default: { - AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); + AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (it != aggFuncMap.end()) { @@ -2349,7 +2416,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // check if a SUM or COUNT covered by AVG if (aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) { - it = aggFuncMap.find(boost::make_tuple(returnedColVec[i].first, ROWAGG_AVG, pUDAFFunc)); + it = aggFuncMap.find(boost::make_tuple(returnedColVec[i].first, ROWAGG_AVG, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (it != aggFuncMap.end()) { @@ -2534,7 +2601,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( functionVec2.push_back(funct); // find if this func is a duplicate - AGG_MAP::iterator iter = aggDupFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); + AGG_MAP::iterator iter = aggDupFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (iter != aggDupFuncMap.end()) { @@ -2551,7 +2618,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } else { - aggDupFuncMap.insert(make_pair(boost::make_tuple(retKey, aggOp, pUDAFFunc), + aggDupFuncMap.insert(make_pair(boost::make_tuple(retKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), funct->fOutputColumnIndex)); } @@ -3048,7 +3115,7 @@ void TupleAggregateStep::prep2PhasesAggregate( typeAggPm.push_back(typeProj[colProj]); widthAggPm.push_back(width[colProj]); - aggFuncMap.insert(make_pair(boost::make_tuple(keysAggPm[colAggPm], 0, pUDAFFunc), colAggPm)); + aggFuncMap.insert(make_pair(boost::make_tuple(keysAggPm[colAggPm], 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAggPm)); colAggPm++; } @@ -3089,7 +3156,7 @@ void TupleAggregateStep::prep2PhasesAggregate( typeAggPm.push_back(typeProj[colProj]); widthAggPm.push_back(width[colProj]); - aggFuncMap.insert(make_pair(boost::make_tuple(keysAggPm[colAggPm], 0, pUDAFFunc), colAggPm)); + aggFuncMap.insert(make_pair(boost::make_tuple(keysAggPm[colAggPm], 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAggPm)); colAggPm++; } @@ -3138,6 +3205,16 @@ void TupleAggregateStep::prep2PhasesAggregate( if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); + // Save the multi-parm keys for dup-detection. + if (pUDAFFunc && udafc->getContext().getParamKeys()->size() == 0) + { + for (uint64_t k = i+1; + k < aggColVec.size() && aggColVec[k].second == AggregateColumn::MULTI_PARM; + ++k) + { + udafc->getContext().getParamKeys()->push_back(aggColVec[k].first); + } + } // Create a RowAggFunctionCol (UDAF subtype) with the context. funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAggPm)); break; @@ -3154,11 +3231,11 @@ void TupleAggregateStep::prep2PhasesAggregate( } // skip if this is a duplicate - if (aggFuncMap.find(boost::make_tuple(aggKey, aggOp, pUDAFFunc)) != aggFuncMap.end()) + if (aggFuncMap.find(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)) != aggFuncMap.end()) continue; functionVecPm.push_back(funct); - aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc), colAggPm)); + aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAggPm)); switch (aggOp) { @@ -3385,6 +3462,10 @@ void TupleAggregateStep::prep2PhasesAggregate( // If the param is const if (udafc) { + if (udafcParamIdx > udafc->aggParms().size() - 1) + { + throw QueryDataExcept("prep2PhasesAggregate: UDAF multi function with too many parms", aggregateFuncErr); + } ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); if (cc) { @@ -3460,6 +3541,16 @@ void TupleAggregateStep::prep2PhasesAggregate( if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); + // Save the multi-parm keys for dup-detection. + if (pUDAFFunc && udafc->getContext().getParamKeys()->size() == 0) + { + for (uint64_t k = i+1; + k < returnedColVec.size() && returnedColVec[k].second == AggregateColumn::MULTI_PARM; + ++k) + { + udafc->getContext().getParamKeys()->push_back(returnedColVec[k].first); + } + } break; } } @@ -3469,7 +3560,7 @@ void TupleAggregateStep::prep2PhasesAggregate( } } - AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); + AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (it != aggFuncMap.end()) { @@ -3490,7 +3581,7 @@ void TupleAggregateStep::prep2PhasesAggregate( // check if a SUM or COUNT covered by AVG if (aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) { - it = aggFuncMap.find(boost::make_tuple(returnedColVec[i].first, ROWAGG_AVG, pUDAFFunc)); + it = aggFuncMap.find(boost::make_tuple(returnedColVec[i].first, ROWAGG_AVG, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (it != aggFuncMap.end()) { @@ -3632,7 +3723,7 @@ void TupleAggregateStep::prep2PhasesAggregate( functionVecUm.push_back(funct); // find if this func is a duplicate - AGG_MAP::iterator iter = aggDupFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); + AGG_MAP::iterator iter = aggDupFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (iter != aggDupFuncMap.end()) { @@ -3649,7 +3740,7 @@ void TupleAggregateStep::prep2PhasesAggregate( } else { - aggDupFuncMap.insert(make_pair(boost::make_tuple(retKey, aggOp, pUDAFFunc), + aggDupFuncMap.insert(make_pair(boost::make_tuple(retKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), funct->fOutputColumnIndex)); } @@ -3911,7 +4002,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( typeAggPm.push_back(typeProj[colProj]); widthAggPm.push_back(width[colProj]); - aggFuncMap.insert(make_pair(boost::make_tuple(keysAggPm[colAggPm], 0, pUDAFFunc), colAggPm)); + aggFuncMap.insert(make_pair(boost::make_tuple(keysAggPm[colAggPm], 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAggPm)); colAggPm++; } @@ -3952,7 +4043,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( typeAggPm.push_back(typeProj[colProj]); widthAggPm.push_back(width[colProj]); - aggFuncMap.insert(make_pair(boost::make_tuple(keysAggPm[colAggPm], 0, pUDAFFunc), colAggPm)); + aggFuncMap.insert(make_pair(boost::make_tuple(keysAggPm[colAggPm], 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAggPm)); colAggPm++; } @@ -4008,6 +4099,16 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); + // Save the multi-parm keys for dup-detection. + if (pUDAFFunc && udafc->getContext().getParamKeys()->size() == 0) + { + for (uint64_t k = i+1; + k < aggColVec.size() && aggColVec[k].second == AggregateColumn::MULTI_PARM; + ++k) + { + udafc->getContext().getParamKeys()->push_back(aggColVec[k].first); + } + } // Create a RowAggFunctionCol (UDAF subtype) with the context. funct.reset(new RowUDAFFunctionCol(udafc->getContext(), colProj, colAggPm)); break; @@ -4024,11 +4125,11 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // skip if this is a duplicate - if (aggFuncMap.find(boost::make_tuple(aggKey, aggOp, pUDAFFunc)) != aggFuncMap.end()) + if (aggFuncMap.find(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)) != aggFuncMap.end()) continue; functionVecPm.push_back(funct); - aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc), colAggPm-multiParm)); + aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAggPm-multiParm)); switch (aggOp) { @@ -4253,6 +4354,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // If the param is const if (udafc) { + if (udafcParamIdx > udafc->aggParms().size() - 1) + { + throw QueryDataExcept("prep2PhasesDistinctAggregate: UDAF multi function with too many parms", aggregateFuncErr); + } ConstantColumn* cc = dynamic_cast(udafc->aggParms()[udafcParamIdx].get()); if (cc) { @@ -4400,6 +4505,16 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (udafc) { pUDAFFunc = udafc->getContext().getFunction(); + // Save the multi-parm keys for dup-detection. + if (pUDAFFunc && udafc->getContext().getParamKeys()->size() == 0) + { + for (uint64_t k = i+1; + k < returnedColVec.size() && returnedColVec[k].second == AggregateColumn::MULTI_PARM; + ++k) + { + udafc->getContext().getParamKeys()->push_back(returnedColVec[k].first); + } + } break; } } @@ -4412,7 +4527,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) { - AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, 0, pUDAFFunc)); + AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (it != aggFuncMap.end()) { @@ -4515,7 +4630,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // For non distinct aggregates if (colUm == -1) { - AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); + AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (it != aggFuncMap.end()) { @@ -4536,7 +4651,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // check if a SUM or COUNT covered by AVG if (aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) { - it = aggFuncMap.find(boost::make_tuple(returnedColVec[i].first, ROWAGG_AVG, pUDAFFunc)); + it = aggFuncMap.find(boost::make_tuple(returnedColVec[i].first, ROWAGG_AVG, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (it != aggFuncMap.end()) { @@ -4674,7 +4789,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( functionVecUm.push_back(funct); // find if this func is a duplicate - AGG_MAP::iterator iter = aggDupFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc)); + AGG_MAP::iterator iter = aggDupFuncMap.find(boost::make_tuple(retKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); if (iter != aggDupFuncMap.end()) { @@ -4691,7 +4806,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } else { - aggDupFuncMap.insert(make_pair(boost::make_tuple(retKey, aggOp, pUDAFFunc), + aggDupFuncMap.insert(make_pair(boost::make_tuple(retKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), funct->fOutputColumnIndex)); } diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp index 0c57ce8bc..f4a95bbc3 100644 --- a/dbcon/mysql/ha_window_function.cpp +++ b/dbcon/mysql/ha_window_function.cpp @@ -323,6 +323,9 @@ string ConvertFuncName(Item_sum* item) case Item_sum::LAG_FUNC: return "LAG"; break; + default: + // We just don't handle it. + break; }; return ""; diff --git a/dbcon/mysql/install_calpont_mysql.sh b/dbcon/mysql/install_calpont_mysql.sh index d9c1290fb..311e03784 100755 --- a/dbcon/mysql/install_calpont_mysql.sh +++ b/dbcon/mysql/install_calpont_mysql.sh @@ -87,6 +87,9 @@ CREATE FUNCTION mcssystemreadonly RETURNS INTEGER soname 'libcalmysql.so'; CREATE AGGREGATE FUNCTION regr_avgx RETURNS REAL soname 'libregr_mysql.so'; CREATE AGGREGATE FUNCTION regr_avgy RETURNS REAL soname 'libregr_mysql.so'; CREATE AGGREGATE FUNCTION regr_count RETURNS INTEGER soname 'libregr_mysql.so'; +CREATE AGGREGATE FUNCTION regr_slope RETURNS REAL soname 'libregr_mysql.so'; +CREATE AGGREGATE FUNCTION regr_intercept RETURNS REAL soname 'libregr_mysql.so'; + CREATE AGGREGATE FUNCTION distinct_count RETURNS INTEGER soname 'libudf_mysql.so'; CREATE DATABASE IF NOT EXISTS infinidb_vtable; diff --git a/genii.vpw b/genii.vpw index 69e258339..686b01ed7 100644 --- a/genii.vpw +++ b/genii.vpw @@ -44,6 +44,7 @@ + diff --git a/utils/regr/CMakeLists.txt b/utils/regr/CMakeLists.txt new file mode 100755 index 000000000..47db83c63 --- /dev/null +++ b/utils/regr/CMakeLists.txt @@ -0,0 +1,26 @@ + +include_directories( ${ENGINE_COMMON_INCLUDES} + ../../dbcon/mysql ) + +########### next target ############### + +set(regr_LIB_SRCS regr_avgx.cpp regr_avgy.cpp regr_count.cpp regr_slope.cpp regr_intercept) + +add_definitions(-DMYSQL_DYNAMIC_PLUGIN) + +add_library(regr SHARED ${regr_LIB_SRCS} ) + +set_target_properties(regr PROPERTIES VERSION 1.1.0 SOVERSION 1) + +install(TARGETS regr DESTINATION ${ENGINE_LIBDIR} COMPONENT libs) + + + +set(regr_mysql_LIB_SRCS regrmysql.cpp) + +add_library(regr_mysql SHARED ${regr_mysql_LIB_SRCS}) + +set_target_properties(regr_mysql PROPERTIES VERSION 1.0.0 SOVERSION 1) + +install(TARGETS regr_mysql DESTINATION ${ENGINE_LIBDIR} COMPONENT storage-engine) + diff --git a/utils/regr/regr.vpj b/utils/regr/regr.vpj index d99a1d436..a22c54ced 100644 --- a/utils/regr/regr.vpj +++ b/utils/regr/regr.vpj @@ -197,6 +197,9 @@ + + + + + getUserData()->data; - DATATYPE val = 0.0; - - if (context->isParamNull(0) || context->isParamNull(1)) - { - return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. - } - - if (valIn_x.empty() || valIn_y.empty()) // Usually empty if NULL. Probably redundant - { - return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. - } - - if (valIn_x.compatible(longTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(charTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(scharTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(shortTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(intTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(llTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(ucharTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(ushortTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(uintTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(ulongTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(ullTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(floatTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(doubleTypeId)) - { - val = valIn_x.cast(); - } + DATATYPE val = convertAnyTo(valIn_x); // For decimal types, we need to move the decimal point. uint32_t scale = valsIn[1].scale; - if (val != 0 && scale > 0) { val /= pow(10.0, (double)scale); @@ -202,76 +137,12 @@ mcsv1_UDAF::ReturnCode regr_avgx::evaluate(mcsv1Context* context, static_any::an mcsv1_UDAF::ReturnCode regr_avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { - static_any::any& valIn_y = valsDropped[0].columnData; static_any::any& valIn_x = valsDropped[1].columnData; struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data; - DATATYPE val = 0.0; - - if (context->isParamNull(0) || context->isParamNull(1)) - { - return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. - } - if (valIn_x.empty() || valIn_y.empty()) - { - return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. - } - - if (valIn_x.compatible(charTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(scharTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(shortTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(intTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(longTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(llTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(ucharTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(ushortTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(uintTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(ulongTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(ullTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(floatTypeId)) - { - val = valIn_x.cast(); - } - else if (valIn_x.compatible(doubleTypeId)) - { - val = valIn_x.cast(); - } + double val = convertAnyTo(valIn_x); // For decimal types, we need to move the decimal point. uint32_t scale = valsDropped[1].scale; - if (val != 0 && scale > 0) { val /= pow(10.0, (double)scale); diff --git a/utils/regr/regr_avgy.cpp b/utils/regr/regr_avgy.cpp index 667019a33..69c654acf 100644 --- a/utils/regr/regr_avgy.cpp +++ b/utils/regr/regr_avgy.cpp @@ -85,75 +85,11 @@ mcsv1_UDAF::ReturnCode regr_avgy::reset(mcsv1Context* context) mcsv1_UDAF::ReturnCode regr_avgy::nextValue(mcsv1Context* context, ColumnDatum* valsIn) { static_any::any& valIn_y = valsIn[0].columnData; - static_any::any& valIn_x = valsIn[1].columnData; struct regr_avgy_data* data = (struct regr_avgy_data*)context->getUserData()->data; - DATATYPE val = 0.0; - - if (context->isParamNull(0) || context->isParamNull(1)) - { - return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. - } - if (valIn_x.empty() || valIn_y.empty()) // Usually empty if NULL. Probably redundant - { - return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. - } - - if (valIn_y.compatible(longTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(charTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(scharTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(shortTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(intTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(llTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(ucharTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(ushortTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(uintTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(ulongTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(ullTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(floatTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(doubleTypeId)) - { - val = valIn_y.cast(); - } + double val = convertAnyTo(valIn_y); // For decimal types, we need to move the decimal point. uint32_t scale = valsIn[0].scale; - if (val != 0 && scale > 0) { val /= pow(10.0, (double)scale); @@ -199,75 +135,11 @@ mcsv1_UDAF::ReturnCode regr_avgy::evaluate(mcsv1Context* context, static_any::an mcsv1_UDAF::ReturnCode regr_avgy::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { static_any::any& valIn_y = valsDropped[0].columnData; - static_any::any& valIn_x = valsDropped[1].columnData; struct regr_avgy_data* data = (struct regr_avgy_data*)context->getUserData()->data; - DATATYPE val = 0.0; - - if (context->isParamNull(0) || context->isParamNull(1)) - { - return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. - } - if (valIn_x.empty() || valIn_y.empty()) - { - return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. - } - - if (valIn_y.compatible(charTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(scharTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(shortTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(intTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(longTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(llTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(ucharTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(ushortTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(uintTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(ulongTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(ullTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(floatTypeId)) - { - val = valIn_y.cast(); - } - else if (valIn_y.compatible(doubleTypeId)) - { - val = valIn_y.cast(); - } + double val = convertAnyTo(valIn_y); // For decimal types, we need to move the decimal point. uint32_t scale = valsDropped[0].scale; - if (val != 0 && scale > 0) { val /= pow(10.0, (double)scale); diff --git a/utils/regr/regr_slope.cpp b/utils/regr/regr_slope.cpp new file mode 100644 index 000000000..c4178c56b --- /dev/null +++ b/utils/regr/regr_slope.cpp @@ -0,0 +1,197 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_slope.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +class Add_regr_slope_ToUDAFMap +{ +public: + Add_regr_slope_ToUDAFMap() + { + UDAFMap::getMap()["regr_slope"] = new regr_slope(); + } +}; + +static Add_regr_slope_ToUDAFMap addToMap; + +// Use the simple data model +struct regr_slope_data +{ + uint64_t cnt; + double sumx; + double sumx2; // sum of (x squared) + double sumy; + double sumxy; // sum of (x*y) +}; + + +mcsv1_UDAF::ReturnCode regr_slope::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_slope() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_slope_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + if (colTypes[0].scale) + { + context->setScale(colTypes[0].scale + 8); + context->setPrecision(19); + } + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_slope::reset(mcsv1Context* context) +{ + struct regr_slope_data* data = (struct regr_slope_data*)context->getUserData()->data; + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_slope::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_slope_data* data = (struct regr_slope_data*)context->getUserData()->data; + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsIn[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy += valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsIn[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scaley); + } + + data->sumx += valx; + data->sumx2 += valx*valx; + + data->sumxy += valx*valy; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_slope::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_slope_data* outData = (struct regr_slope_data*)context->getUserData()->data; + struct regr_slope_data* inData = (struct regr_slope_data*)userDataIn->data; + + outData->sumx += inData->sumx; + outData->sumx2 += inData->sumx2; + outData->sumy += inData->sumy; + outData->sumxy += inData->sumxy; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_slope::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_slope_data* data = (struct regr_slope_data*)context->getUserData()->data; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumx2 = data->sumx2; + double sumxy = data->sumxy; + double variance = (N * sumx2) - (sumx * sumx); + if (variance != 0) + { + valOut = ((N * sumxy) - (sumx * sumy)) / variance; + } + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_slope::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_slope_data* data = (struct regr_slope_data*)context->getUserData()->data; + + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsDropped[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy -= valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsDropped[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scaley); + } + + data->sumx -= valx; + data->sumx2 -= valx*valx; + + data->sumxy -= valx*valy; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/regr/regr_slope.h b/utils/regr/regr_slope.h new file mode 100644 index 000000000..9c148d895 --- /dev/null +++ b/utils/regr/regr_slope.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_slope.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_slope function + * + * + * CREATE AGGREGATE FUNCTION regr_slope returns REAL + * soname 'libregr_mysql.so'; + * + */ +#ifndef HEADER_regr_slope +#define HEADER_regr_slope + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Return the regr_slope value of the dataset + +class regr_slope : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_slope() : mcsv1_UDAF() {}; + virtual ~regr_slope() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_slope.h + diff --git a/utils/regr/regrmysql.cpp b/utils/regr/regrmysql.cpp index 6870f3050..3b048f14d 100644 --- a/utils/regr/regrmysql.cpp +++ b/utils/regr/regrmysql.cpp @@ -199,7 +199,7 @@ extern "C" #ifdef _MSC_VER __declspec(dllexport) #endif - long long regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + double regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), char* is_null, char* error __attribute__((unused))) { struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr; @@ -283,7 +283,7 @@ extern "C" #ifdef _MSC_VER __declspec(dllexport) #endif - long long regr_avgy(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + double regr_avgy(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), char* is_null, char* error __attribute__((unused))) { struct regr_avgy_data* data = (struct regr_avgy_data*)initid->ptr; @@ -368,7 +368,225 @@ extern "C" struct regr_count_data* data = (struct regr_count_data*)initid->ptr; return data->cnt; } + //======================================================================= + + /** + * regr_slope connector stub + */ + struct regr_slope_data + { + int64_t cnt; + double sumx; + double sumx2; // sum of (x squared) + double sumy; + double sumxy; // sum of (x*y) + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_slope_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_slope_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_slope() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_slope_data*) malloc(sizeof(struct regr_slope_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_slope_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_slope_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_slope_data* data = (struct regr_slope_data*)initid->ptr; + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_slope_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // Test for NULL in x and y + if (args->args[0] == 0 || args->args[1] == 0) + { + return; + } + struct regr_slope_data* data = (struct regr_slope_data*)initid->ptr; + double yval = cvtArgToDouble(args->arg_type[0], args->args[0]); + double xval = cvtArgToDouble(args->arg_type[1], args->args[1]); + data->sumy += yval; + data->sumx += xval; + data->sumx2 += xval*xval; + data->sumxy += xval*yval; + ++data->cnt; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + double regr_slope(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_slope_data* data = (struct regr_slope_data*)initid->ptr; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumx2 = data->sumx2; + double sumxy = data->sumxy; + double variance = (N * sumx2) - (sumx * sumx); + if (variance) + { + return ((N * sumxy) - (sumx * sumy)) / variance; + } + } + *is_null = 1; + return 0; + } + +//======================================================================= + + /** + * regr_intercept connector stub + */ + struct regr_intercept_data + { + int64_t cnt; + double sumx; + double sumx2; // sum of (x squared) + double sumy; + double sumxy; // sum of (x*y) + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_intercept_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_intercept_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_intercept() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_intercept_data*) malloc(sizeof(struct regr_intercept_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_intercept_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_intercept_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_intercept_data* data = (struct regr_intercept_data*)initid->ptr; + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_intercept_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // Test for NULL in x and y + if (args->args[0] == 0 || args->args[1] == 0) + { + return; + } + struct regr_intercept_data* data = (struct regr_intercept_data*)initid->ptr; + double yval = cvtArgToDouble(args->arg_type[0], args->args[0]); + double xval = cvtArgToDouble(args->arg_type[1], args->args[1]); + data->sumy += yval; + data->sumx += xval; + data->sumx2 += xval*xval; + data->sumxy += xval*yval; + ++data->cnt; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + double regr_intercept(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_intercept_data* data = (struct regr_intercept_data*)initid->ptr; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumx2 = data->sumx2; + double sumxy = data->sumxy; + double variance = (N * sumx2) - (sumx * sumx); + if (variance) + { + double slope = ((N * sumxy) - (sumx * sumy)) / variance; + return (sumy - (slope * sumx)) / N; + } + } + *is_null = 1; + return 0; + } } // vim:ts=4 sw=4: diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index b593239cd..e039d5c2a 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -242,7 +242,7 @@ struct RowUDAFFunctionCol : public RowAggFunctionCol mcsv1sdk::mcsv1Context fUDAFContext; // The UDAF context bool bInterrupted; // Shared by all the threads -}; + }; inline void RowAggFunctionCol::serialize(messageqcpp::ByteStream& bs) const { diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index 073b5164a..ec0d0cb79 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -381,6 +381,7 @@ private: std::string functionName; mcsv1sdk::mcsv1_UDAF* func; int32_t fParamCount; + std::vector paramKeys; public: // For use by the framework @@ -403,6 +404,7 @@ public: EXPORT mcsv1sdk::mcsv1_UDAF* getFunction() const; EXPORT boost::shared_ptr getUserDataSP(); EXPORT void setParamCount(int32_t paramCount); + std::vector* getParamKeys(); }; // Since aggregate functions can operate on any data type, we use the following structure @@ -606,6 +608,9 @@ public: virtual ReturnCode createUserData(UserData*& userdata, int32_t& length); protected: + // some handy conversion routines + template + T convertAnyTo(static_any::any&); // These are handy for testing the actual type of static_any static const static_any::any& charTypeId; static const static_any::any& scharTypeId; @@ -948,6 +953,11 @@ inline void mcsv1Context::setParamCount(int32_t paramCount) fParamCount = paramCount; } +inline std::vector* mcsv1Context::getParamKeys() +{ + return ¶mKeys; +} + inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { return NOT_IMPLEMENTED; @@ -960,6 +970,68 @@ inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::createUserData(UserData*& userData, in return SUCCESS; } + + +// Handy helper functions +template +inline T mcsv1_UDAF::convertAnyTo(static_any::any& valIn) +{ + T val; + if (valIn.compatible(longTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(charTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(scharTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(shortTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(intTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(llTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ucharTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ushortTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(uintTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ulongTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(ullTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(floatTypeId)) + { + val = valIn.cast(); + } + else if (valIn.compatible(doubleTypeId)) + { + val = valIn.cast(); + } + return val; +} + }; // namespace mcssdk #undef EXPORT From 94dfacfe2590e2a4524959e9b309402bf592b66b Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Fri, 28 Sep 2018 07:21:49 +0100 Subject: [PATCH 21/32] MCOL-1750 Fix threadpool stack leaks When a thread has been idle for 10 minutes and we have too many threads in the threadpool the thread will be pruned. This is done by the thread's main function just returning. Unfortunately this does not free up the memory, the thread either needs to be joined or detatched. We cannot use detached threads since there are mutexes and conditional variables between the main thread and the threadpool threads. If the main thread finishes before the threadpool threads (as would happen in cpimport) then crashes occur. The parent needs to wait on the child threads which is the whole point in joining. So this fix spawns a new thread which every minute will check the list of threads to be joined due to timeout and join them. We have had to use an adapted version of boost::thread_group so that we can join a single thread based off its thread ID. In addition with have modified PriorityThreadPool to use detached threads since this does not need to signal the child threads at the end. --- utils/threadpool/prioritythreadpool.cpp | 27 ++++-- utils/threadpool/threadpool.cpp | 42 ++++++++- utils/threadpool/threadpool.h | 108 +++++++++++++++++++++++- 3 files changed, 168 insertions(+), 9 deletions(-) diff --git a/utils/threadpool/prioritythreadpool.cpp b/utils/threadpool/prioritythreadpool.cpp index 4d19df91e..a5c713eab 100644 --- a/utils/threadpool/prioritythreadpool.cpp +++ b/utils/threadpool/prioritythreadpool.cpp @@ -42,12 +42,22 @@ PriorityThreadPool::PriorityThreadPool(uint targetWeightPerRun, uint highThreads uint midThreads, uint lowThreads, uint ID) : _stop(false), weightPerRun(targetWeightPerRun), id(ID) { + boost::thread* newThread; for (uint32_t i = 0; i < highThreads; i++) - threads.create_thread(ThreadHelper(this, HIGH)); + { + newThread = threads.create_thread(ThreadHelper(this, HIGH)); + newThread->detach(); + } for (uint32_t i = 0; i < midThreads; i++) - threads.create_thread(ThreadHelper(this, MEDIUM)); + { + newThread = threads.create_thread(ThreadHelper(this, MEDIUM)); + newThread->detach(); + } for (uint32_t i = 0; i < lowThreads; i++) - threads.create_thread(ThreadHelper(this, LOW)); + { + newThread = threads.create_thread(ThreadHelper(this, LOW)); + newThread->detach(); + } cout << "started " << highThreads << " high, " << midThreads << " med, " << lowThreads << " low.\n"; defaultThreadCounts[HIGH] = threadCounts[HIGH] = highThreads; @@ -62,6 +72,7 @@ PriorityThreadPool::~PriorityThreadPool() void PriorityThreadPool::addJob(const Job &job, bool useLock) { + boost::thread* newThread; mutex::scoped_lock lk(mutex, defer_lock_t()); if (useLock) @@ -70,17 +81,20 @@ void PriorityThreadPool::addJob(const Job &job, bool useLock) // Create any missing threads if (defaultThreadCounts[HIGH] != threadCounts[HIGH]) { - threads.create_thread(ThreadHelper(this, HIGH)); + newThread = threads.create_thread(ThreadHelper(this, HIGH)); + newThread->detach(); threadCounts[HIGH]++; } if (defaultThreadCounts[MEDIUM] != threadCounts[MEDIUM]) { - threads.create_thread(ThreadHelper(this, MEDIUM)); + newThread = threads.create_thread(ThreadHelper(this, MEDIUM)); + newThread->detach(); threadCounts[MEDIUM]++; } if (defaultThreadCounts[LOW] != threadCounts[LOW]) { - threads.create_thread(ThreadHelper(this, LOW)); + newThread = threads.create_thread(ThreadHelper(this, LOW)); + newThread->detach(); threadCounts[LOW]++; } @@ -261,7 +275,6 @@ void PriorityThreadPool::sendErrorMsg(uint32_t id, uint32_t step, primitiveproce void PriorityThreadPool::stop() { _stop = true; - threads.join_all(); } } // namespace threadpool diff --git a/utils/threadpool/threadpool.cpp b/utils/threadpool/threadpool.cpp index d903f9892..0b1546e98 100644 --- a/utils/threadpool/threadpool.cpp +++ b/utils/threadpool/threadpool.cpp @@ -43,7 +43,8 @@ ThreadPool::ThreadPool() } ThreadPool::ThreadPool( size_t maxThreads, size_t queueSize ) - :fMaxThreads( maxThreads ), fQueueSize( queueSize ) + :fMaxThreads( maxThreads ), fQueueSize( queueSize ), + fPruneThread( NULL ) { init(); } @@ -72,6 +73,7 @@ void ThreadPool::init() fStop = false; fNextFunctor = fWaitingFunctors.end(); fNextHandle=1; + fPruneThread = new boost::thread(boost::bind(&ThreadPool::pruneThread, this)); } void ThreadPool::setQueueSize(size_t queueSize) @@ -80,6 +82,39 @@ void ThreadPool::setQueueSize(size_t queueSize) fQueueSize = queueSize; } +void ThreadPool::pruneThread() +{ + boost::mutex::scoped_lock lock2(fPruneMutex); + + while(true) + { + boost::system_time timeout = boost::get_system_time() + boost::posix_time::minutes(1); + if (!fPruneThreadEnd.timed_wait(fPruneMutex, timeout)) + { + while(!fPruneThreads.empty()) + { + if (fDebug) + { + ostringstream oss; + oss << "pruning thread " << fPruneThreads.top(); + logging::Message::Args args; + logging::Message message(0); + args.add(oss.str()); + message.format( args ); + logging::LoggingID lid(22); + logging::MessageLog ml(lid); + ml.logWarningMessage( message ); + } + fThreads.join_one(fPruneThreads.top()); + fPruneThreads.pop(); + } + } + else + { + break; + } + } +} void ThreadPool::setMaxThreads(size_t maxThreads) { @@ -93,6 +128,9 @@ void ThreadPool::stop() fStop = true; lock1.unlock(); + fPruneThreadEnd.notify_all(); + fPruneThread->join(); + delete fPruneThread; fNeedThread.notify_all(); fThreads.join_all(); } @@ -293,6 +331,8 @@ void ThreadPool::beginThread() throw() { if (fThreadCount > fMaxThreads) { + boost::mutex::scoped_lock lock2(fPruneMutex); + fPruneThreads.push(boost::this_thread::get_id()); --fThreadCount; return; } diff --git a/utils/threadpool/threadpool.h b/utils/threadpool/threadpool.h index f11bb4b2b..95f7d4c7e 100644 --- a/utils/threadpool/threadpool.h +++ b/utils/threadpool/threadpool.h @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,106 @@ namespace threadpool { + +// Taken from boost::thread_group and adapted +class ThreadPoolGroup +{ +private: + ThreadPoolGroup(ThreadPoolGroup const&); + ThreadPoolGroup& operator=(ThreadPoolGroup const&); +public: + ThreadPoolGroup() {} + ~ThreadPoolGroup() + { + for(std::list::iterator it=threads.begin(),end=threads.end(); + it!=end; + ++it) + { + delete *it; + } + } + + template + boost::thread* create_thread(F threadfunc) + { + boost::lock_guard guard(m); + std::unique_ptr new_thread(new boost::thread(threadfunc)); + threads.push_back(new_thread.get()); + return new_thread.release(); + } + + void add_thread(boost::thread* thrd) + { + if(thrd) + { + boost::lock_guard guard(m); + threads.push_back(thrd); + } + } + + void remove_thread(boost::thread* thrd) + { + boost::lock_guard guard(m); + std::list::iterator const it=std::find(threads.begin(),threads.end(),thrd); + if(it!=threads.end()) + { + threads.erase(it); + } + } + + void join_all() + { + boost::shared_lock guard(m); + + for(std::list::iterator it=threads.begin(),end=threads.end(); + it!=end; + ++it) + { + (*it)->join(); + } + } + + void interrupt_all() + { + boost::shared_lock guard(m); + + for(std::list::iterator it=threads.begin(),end=threads.end(); + it!=end; + ++it) + { + (*it)->interrupt(); + } + } + + size_t size() const + { + boost::shared_lock guard(m); + return threads.size(); + } + + void join_one(boost::thread::id id) + { + boost::shared_lock guard(m); + for(std::list::iterator it=threads.begin(),end=threads.end(); + it!=end; + ++it) + { + if ((*it)->get_id() == id) + { + (*it)->join(); + threads.erase(it); + return; + } + } + + } + +private: + std::list threads; + mutable boost::shared_mutex m; +}; + + /** @brief ThreadPool is a component for working with pools of threads and asynchronously * executing tasks. It is responsible for creating threads and tracking which threads are "busy" * and which are idle. Idle threads are utilized as "work" is added to the system. @@ -183,6 +284,7 @@ private: */ void beginThread() throw(); + void pruneThread(); ThreadPool(const ThreadPool&); ThreadPool& operator = (const ThreadPool&); @@ -221,7 +323,7 @@ private: boost::mutex fMutex; boost::condition fThreadAvailable; // triggered when a thread is available boost::condition fNeedThread; // triggered when a thread is needed - boost::thread_group fThreads; + ThreadPoolGroup fThreads; bool fStop; long fGeneralErrors; @@ -231,6 +333,10 @@ private: std::string fName; // Optional to add a name to the pool for debugging. bool fDebug; + boost::mutex fPruneMutex; + boost::condition fPruneThreadEnd; + boost::thread* fPruneThread; + std::stack fPruneThreads; // A list of stale thread IDs to be joined }; // This class, if instantiated, will continuously log details about the indicated threadpool From 5092b4fd13ba01f8e727184687a236e14a4f3a56 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Fri, 28 Sep 2018 07:55:06 +0100 Subject: [PATCH 22/32] MCOL-1750 unique_ptr doesn't work in all OSes --- utils/threadpool/threadpool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/threadpool/threadpool.h b/utils/threadpool/threadpool.h index 95f7d4c7e..1f0c6d0aa 100644 --- a/utils/threadpool/threadpool.h +++ b/utils/threadpool/threadpool.h @@ -75,7 +75,7 @@ public: boost::thread* create_thread(F threadfunc) { boost::lock_guard guard(m); - std::unique_ptr new_thread(new boost::thread(threadfunc)); + std::auto_ptr new_thread(new boost::thread(threadfunc)); threads.push_back(new_thread.get()); return new_thread.release(); } From dc9ba90f96fcfef2259c42e85b42f48a9ed65b90 Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 28 Sep 2018 13:51:43 -0500 Subject: [PATCH 23/32] MCOL-521 add regr_intecept and regr_r2 --- dbcon/mysql/install_calpont_mysql.sh | 1 + utils/regr/CMakeLists.txt | 2 +- utils/regr/regr.vpj | 2 + utils/regr/regr_intercept.cpp | 197 ++++++++++++++++++++++++ utils/regr/regr_intercept.h | 88 +++++++++++ utils/regr/regr_r2.cpp | 216 ++++++++++++++++++++++++++ utils/regr/regr_r2.h | 88 +++++++++++ utils/regr/regr_slope.cpp | 11 +- utils/regr/regrmysql.cpp | 145 ++++++++++++++++- utils/udfsdk/distinct_count.cpp | 99 ++++++++++++ utils/udfsdk/distinct_count.h | 222 +++++++++++++++++++++++++++ 11 files changed, 1058 insertions(+), 13 deletions(-) create mode 100644 utils/regr/regr_intercept.cpp create mode 100644 utils/regr/regr_intercept.h create mode 100644 utils/regr/regr_r2.cpp create mode 100644 utils/regr/regr_r2.h create mode 100644 utils/udfsdk/distinct_count.cpp create mode 100644 utils/udfsdk/distinct_count.h diff --git a/dbcon/mysql/install_calpont_mysql.sh b/dbcon/mysql/install_calpont_mysql.sh index 311e03784..a5d7150a2 100755 --- a/dbcon/mysql/install_calpont_mysql.sh +++ b/dbcon/mysql/install_calpont_mysql.sh @@ -89,6 +89,7 @@ CREATE AGGREGATE FUNCTION regr_avgy RETURNS REAL soname 'libregr_mysql.so'; CREATE AGGREGATE FUNCTION regr_count RETURNS INTEGER soname 'libregr_mysql.so'; CREATE AGGREGATE FUNCTION regr_slope RETURNS REAL soname 'libregr_mysql.so'; CREATE AGGREGATE FUNCTION regr_intercept RETURNS REAL soname 'libregr_mysql.so'; +CREATE AGGREGATE FUNCTION regr_r2 RETURNS REAL soname 'libregr_mysql.so'; CREATE AGGREGATE FUNCTION distinct_count RETURNS INTEGER soname 'libudf_mysql.so'; diff --git a/utils/regr/CMakeLists.txt b/utils/regr/CMakeLists.txt index 47db83c63..16f44d9af 100755 --- a/utils/regr/CMakeLists.txt +++ b/utils/regr/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ########### next target ############### -set(regr_LIB_SRCS regr_avgx.cpp regr_avgy.cpp regr_count.cpp regr_slope.cpp regr_intercept) +set(regr_LIB_SRCS regr_avgx.cpp regr_avgy.cpp regr_count.cpp regr_slope.cpp regr_intercept regr_r2) add_definitions(-DMYSQL_DYNAMIC_PLUGIN) diff --git a/utils/regr/regr.vpj b/utils/regr/regr.vpj index a22c54ced..0de8c7282 100644 --- a/utils/regr/regr.vpj +++ b/utils/regr/regr.vpj @@ -198,6 +198,7 @@ + @@ -208,6 +209,7 @@ + +#include +#include +#include "regr_intercept.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +class Add_regr_intercept_ToUDAFMap +{ +public: + Add_regr_intercept_ToUDAFMap() + { + UDAFMap::getMap()["regr_intercept"] = new regr_intercept(); + } +}; + +static Add_regr_intercept_ToUDAFMap addToMap; + +// Use the simple data model +struct regr_intercept_data +{ + uint64_t cnt; + double sumx; + double sumx2; // sum of (x squared) + double sumy; + double sumxy; // sum of (x*y) +}; + + +mcsv1_UDAF::ReturnCode regr_intercept::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_intercept() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_intercept_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 8); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_intercept::reset(mcsv1Context* context) +{ + struct regr_intercept_data* data = (struct regr_intercept_data*)context->getUserData()->data; + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + data->sumxy = 0.0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_intercept::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_intercept_data* data = (struct regr_intercept_data*)context->getUserData()->data; + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsIn[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy += valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsIn[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scaley); + } + + data->sumx += valx; + data->sumx2 += valx*valx; + + data->sumxy += valx*valy; + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_intercept::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_intercept_data* outData = (struct regr_intercept_data*)context->getUserData()->data; + struct regr_intercept_data* inData = (struct regr_intercept_data*)userDataIn->data; + + outData->sumx += inData->sumx; + outData->sumx2 += inData->sumx2; + outData->sumy += inData->sumy; + outData->sumxy += inData->sumxy; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_intercept::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_intercept_data* data = (struct regr_intercept_data*)context->getUserData()->data; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumx2 = data->sumx2; + double sumxy = data->sumxy; + double slope = 0.0; + double variance = (N * sumx2) - (sumx * sumx); + if (variance != 0) + { + slope = ((N * sumxy) - (sumx * sumy)) / variance; + valOut = (sumy - (slope * sumx)) / N; + } + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_intercept::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_intercept_data* data = (struct regr_intercept_data*)context->getUserData()->data; + + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsDropped[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy -= valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsDropped[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scaley); + } + + data->sumx -= valx; + data->sumx2 -= valx*valx; + + data->sumxy -= valx*valy; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/regr/regr_intercept.h b/utils/regr/regr_intercept.h new file mode 100644 index 000000000..ed82477cd --- /dev/null +++ b/utils/regr/regr_intercept.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_intercept.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_intercept function + * + * + * CREATE AGGREGATE FUNCTION regr_intercept returns REAL + * soname 'libregr_mysql.so'; + * + */ +#ifndef HEADER_regr_intercept +#define HEADER_regr_intercept + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Return the regr_intercept value of the dataset + +class regr_intercept : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_intercept() : mcsv1_UDAF() {}; + virtual ~regr_intercept() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_intercept.h + diff --git a/utils/regr/regr_r2.cpp b/utils/regr/regr_r2.cpp new file mode 100644 index 000000000..052b5dcfc --- /dev/null +++ b/utils/regr/regr_r2.cpp @@ -0,0 +1,216 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_r2.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +class Add_regr_r2_ToUDAFMap +{ +public: + Add_regr_r2_ToUDAFMap() + { + UDAFMap::getMap()["regr_r2"] = new regr_r2(); + } +}; + +static Add_regr_r2_ToUDAFMap addToMap; + +// Use the simple data model +struct regr_r2_data +{ + uint64_t cnt; + double sumx; + double sumx2; // sum of (x squared) + double sumy; + double sumy2; // sum of (y squared) + double sumxy; // sum of x * y +}; + + +mcsv1_UDAF::ReturnCode regr_r2::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_r2() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_r2_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 8); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_r2::reset(mcsv1Context* context) +{ + struct regr_r2_data* data = (struct regr_r2_data*)context->getUserData()->data; + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + data->sumy2 = 0.0; + data->sumxy = 0.0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_r2::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_r2_data* data = (struct regr_r2_data*)context->getUserData()->data; + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsIn[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy += valy; + data->sumy2 += valy*valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsIn[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scaley); + } + + data->sumx += valx; + data->sumx2 += valx*valx; + + data->sumxy += valx*valy; + + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_r2::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_r2_data* outData = (struct regr_r2_data*)context->getUserData()->data; + struct regr_r2_data* inData = (struct regr_r2_data*)userDataIn->data; + + outData->sumx += inData->sumx; + outData->sumx2 += inData->sumx2; + outData->sumy += inData->sumy; + outData->sumy2 += inData->sumy2; + outData->sumxy += inData->sumxy; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_r2::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_r2_data* data = (struct regr_r2_data*)context->getUserData()->data; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumx2 = data->sumx2; + double sumy2 = data->sumy2; + double sumxy = data->sumxy; + + double var_popx = (sumx2 - (sumx * sumx / N)) / N; + if (var_popx == 0) + { + // When var_popx is 0, NULL is the result. + return mcsv1_UDAF::SUCCESS; + } + double var_popy = (sumy2 - (sumy * sumy / N)) / N; + if (var_popy == 0) + { + // When var_popy is 0, 1 is the result + valOut = 1.0; + return mcsv1_UDAF::SUCCESS; + } + double std_popx = sqrt(var_popx); + double std_popy = sqrt(var_popy); + double covar_pop = (sumxy - ((sumx * sumy) / N)) / N; + double corr = covar_pop / (std_popy * std_popx); + valOut = corr * corr; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_r2::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_r2_data* data = (struct regr_r2_data*)context->getUserData()->data; + + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsDropped[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy -= valy; + data->sumy2 -= valy*valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsDropped[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scaley); + } + + data->sumx -= valx; + data->sumx2 -= valx*valx; + + data->sumxy -= valx*valy; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/regr/regr_r2.h b/utils/regr/regr_r2.h new file mode 100644 index 000000000..6ff65009a --- /dev/null +++ b/utils/regr/regr_r2.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_r2.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_r2 function + * + * + * CREATE AGGREGATE FUNCTION regr_r2 returns REAL + * soname 'libregr_mysql.so'; + * + */ +#ifndef HEADER_regr_intercept +#define HEADER_regr_intercept + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Return the regr_r2 value of the dataset + +class regr_r2 : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_r2() : mcsv1_UDAF() {}; + virtual ~regr_r2() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_intercept.h + diff --git a/utils/regr/regr_slope.cpp b/utils/regr/regr_slope.cpp index c4178c56b..51f649046 100644 --- a/utils/regr/regr_slope.cpp +++ b/utils/regr/regr_slope.cpp @@ -60,11 +60,8 @@ mcsv1_UDAF::ReturnCode regr_slope::init(mcsv1Context* context, context->setUserDataSize(sizeof(regr_slope_data)); context->setResultType(CalpontSystemCatalog::DOUBLE); context->setColWidth(8); - if (colTypes[0].scale) - { - context->setScale(colTypes[0].scale + 8); - context->setPrecision(19); - } + context->setScale(colTypes[0].scale + 8); + context->setPrecision(19); context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); return mcsv1_UDAF::SUCCESS; @@ -77,6 +74,7 @@ mcsv1_UDAF::ReturnCode regr_slope::reset(mcsv1Context* context) data->sumx = 0.0; data->sumx2 = 0.0; data->sumy = 0.0; + data->sumxy = 0.0; return mcsv1_UDAF::SUCCESS; } @@ -150,7 +148,8 @@ mcsv1_UDAF::ReturnCode regr_slope::evaluate(mcsv1Context* context, static_any::a double variance = (N * sumx2) - (sumx * sumx); if (variance != 0) { - valOut = ((N * sumxy) - (sumx * sumy)) / variance; + double slope = ((N * sumxy) - (sumx * sumy)) / variance; + valOut = slope; } } return mcsv1_UDAF::SUCCESS; diff --git a/utils/regr/regrmysql.cpp b/utils/regr/regrmysql.cpp index 3b048f14d..fce6bb440 100644 --- a/utils/regr/regrmysql.cpp +++ b/utils/regr/regrmysql.cpp @@ -125,7 +125,7 @@ extern "C" //======================================================================= /** - * regr_avgx connector stub + * regr_avgx */ struct regr_avgx_data { @@ -209,7 +209,7 @@ extern "C" //======================================================================= /** - * regr_avgy connector stub + * regr_avgy */ struct regr_avgy_data { @@ -293,7 +293,7 @@ extern "C" //======================================================================= /** - * regr_count connector stub + * regr_count */ struct regr_count_data { @@ -372,11 +372,11 @@ extern "C" //======================================================================= /** - * regr_slope connector stub + * regr_slope */ struct regr_slope_data { - int64_t cnt; + int64_t cnt; double sumx; double sumx2; // sum of (x squared) double sumy; @@ -404,6 +404,7 @@ extern "C" data->sumx = 0.0; data->sumx2 = 0.0; data->sumy = 0.0; + data->sumxy = 0.0; initid->ptr = (char*)data; return 0; @@ -429,6 +430,7 @@ extern "C" data->sumx = 0.0; data->sumx2 = 0.0; data->sumy = 0.0; + data->sumxy = 0.0; } #ifdef _MSC_VER @@ -481,7 +483,7 @@ extern "C" //======================================================================= /** - * regr_intercept connector stub + * regr_intercept */ struct regr_intercept_data { @@ -513,6 +515,7 @@ extern "C" data->sumx = 0.0; data->sumx2 = 0.0; data->sumy = 0.0; + data->sumxy = 0.0; initid->ptr = (char*)data; return 0; @@ -538,6 +541,7 @@ extern "C" data->sumx = 0.0; data->sumx2 = 0.0; data->sumy = 0.0; + data->sumxy = 0.0; } #ifdef _MSC_VER @@ -587,6 +591,135 @@ extern "C" *is_null = 1; return 0; } + +//======================================================================= + + /** + * regr_r2 + */ + struct regr_r2_data + { + int64_t cnt; + double sumx; + double sumx2; // sum of (x squared) + double sumy; + double sumy2; // sum of (y squared) + double sumxy; // sum of (x*y) + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_r2_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_r2_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_r2() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_r2_data*) malloc(sizeof(struct regr_r2_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + data->sumy2 = 0.0; + data->sumxy = 0.0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_r2_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_r2_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_r2_data* data = (struct regr_r2_data*)initid->ptr; + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + data->sumy2 = 0.0; + data->sumxy = 0.0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_r2_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // Test for NULL in x and y + if (args->args[0] == 0 || args->args[1] == 0) + { + return; + } + struct regr_r2_data* data = (struct regr_r2_data*)initid->ptr; + double yval = cvtArgToDouble(args->arg_type[0], args->args[0]); + double xval = cvtArgToDouble(args->arg_type[1], args->args[1]); + data->sumy += yval; + data->sumx += xval; + data->sumx2 += xval*xval; + data->sumy2 += yval*yval; + data->sumxy += xval*yval; + ++data->cnt; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + double regr_r2(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_r2_data* data = (struct regr_r2_data*)initid->ptr; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumx2 = data->sumx2; + double sumy2 = data->sumy2; + double sumxy = data->sumxy; + double var_popx = (sumx2 - (sumx * sumx / N)) / N; + if (var_popx == 0) + { + // When var_popx is 0, NULL is the result. + *is_null = 1; + return 0; + } + double var_popy = (sumy2 - (sumy * sumy / N)) / N; + if (var_popy == 0) + { + // When var_popy is 0, 1 is the result + return 1; + } + double std_popx = sqrt(var_popx); + double std_popy = sqrt(var_popy); + double covar_pop = (sumxy - ((sumx * sumy) / N)) / N; + double corr = covar_pop / (std_popy * std_popx); + return corr * corr; + } + *is_null = 1; + return 0; + } } // vim:ts=4 sw=4: diff --git a/utils/udfsdk/distinct_count.cpp b/utils/udfsdk/distinct_count.cpp new file mode 100644 index 000000000..66dcea18f --- /dev/null +++ b/utils/udfsdk/distinct_count.cpp @@ -0,0 +1,99 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include "distinct_count.h" + +using namespace mcsv1sdk; + +struct distinct_count_data +{ + uint64_t cnt; +}; + +#define OUT_TYPE int64_t +mcsv1_UDAF::ReturnCode distinct_count::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + context->setUserDataSize(sizeof(distinct_count_data)); + if (context->getParameterCount() != 1) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("avgx() with other than 1 arguments"); + return mcsv1_UDAF::ERROR; + } + context->setResultType(CalpontSystemCatalog::BIGINT); + context->setColWidth(8); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + context->setRunFlag(mcsv1sdk::UDAF_DISTINCT); + context->setRunFlag(mcsv1sdk::UDAF_OVER_REQUIRED); + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode distinct_count::reset(mcsv1Context* context) +{ + struct distinct_count_data* data = (struct distinct_count_data*)context->getUserData()->data; + data->cnt = 0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode distinct_count::nextValue(mcsv1Context* context, + ColumnDatum* valsIn) +{ + static_any::any& valIn = valsIn[0].columnData; + struct distinct_count_data* data = (struct distinct_count_data*)context->getUserData()->data; + + if (valIn.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + data->cnt++; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode distinct_count::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + struct distinct_count_data* outData = (struct distinct_count_data*)context->getUserData()->data; + struct distinct_count_data* inData = (struct distinct_count_data*)userDataIn->data; + outData->cnt += inData->cnt; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode distinct_count::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct distinct_count_data* data = (struct distinct_count_data*)context->getUserData()->data; + valOut = data->cnt; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode distinct_count::dropValue(mcsv1Context* context, + ColumnDatum* valsDropped) +{ + static_any::any& valIn = valsDropped[0].columnData; + struct distinct_count_data* data = (struct distinct_count_data*)context->getUserData()->data; + + if (valIn.empty()) + { + return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on. + } + + data->cnt--; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/udfsdk/distinct_count.h b/utils/udfsdk/distinct_count.h new file mode 100644 index 000000000..1d804eaa8 --- /dev/null +++ b/utils/udfsdk/distinct_count.h @@ -0,0 +1,222 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* mcsv1_UDAF.h +***********************************************************************/ + +/** + * Columnstore interface for writing a User Defined Aggregate + * Functions (UDAF) and User Defined Analytic Functions (UDAnF) + * or a function that can act as either - UDA(n)F + * + * The basic steps are: + * + * 1. Create a the UDA(n)F function interface in some .h file. + * 2. Create the UDF function implementation in some .cpp file + * 3. Create the connector stub (MariaDB UDAF definition) for + * this UDF function. + * 4. build the dynamic library using all of the source. + * 5 Put the library in $COLUMNSTORE_INSTALL/lib of + * all modules + * 6. restart the Columnstore system. + * 7. notify mysqld about the new functions with commands like: + * + * CREATE AGGREGATE FUNCTION distinct_count returns INT + * soname 'libudf_mysql.so'; + * + */ +#ifndef HEADER_distinct_count +#define HEADER_distinct_count + +#include +#include +#include + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or +// User Defined Analytic Function (UDAnF). +// These will be singleton classes, so don't put any instance +// specific data in here. All instance data is stored in mcsv1Context +// passed to each user function and retrieved by the getUserData() method. +// +// Each API function returns a ReturnCode. If ERROR is returned at any time, +// the query is aborted, getInterrupted() will begin to return true and the +// message set in config->setErrorMessage() is returned to MariaDB. +class distinct_count : public mcsv1_UDAF +{ +public: + // Defaults OK + distinct_count() : mcsv1_UDAF(){}; + virtual ~distinct_count(){}; + + /** + * init() + * + * Mandatory. Implement this to initialize flags and instance + * data. Called once per SQL statement. You can do any sanity + * checks here. + * + * colTypes (in) - A vector of ColDataType defining the + * parameters of the UDA(n)F call. These can be used to decide + * to override the default return type. If desired, the new + * return type can be set by context->setReturnType() and + * decimal precision can be set in context-> + * setResultDecimalCharacteristics. + * + * Return mcsv1_UDAF::ERROR on any error, such as non-compatible + * colTypes or wrong number of arguments. Else return + * mcsv1_UDAF::SUCCESS. + */ + virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); + + /** + * reset() + * + * Mandatory. Reset the UDA(n)F for a new group, partition or, + * in some cases, new Window Frame. Do not free any memory + * allocated by context->setUserDataSize(). The SDK Framework owns + * that memory and will handle that. Use this opportunity to + * reset any variables in context->getUserData() needed for the + * next aggregation. May be called multiple times if running in + * a ditributed fashion. + * + * Use this opportunity to initialize the userData. + */ + virtual ReturnCode reset(mcsv1Context* context); + + /** + * nextValue() + * + * Mandatory. Handle a single row. + * + * colsIn - A vector of data structure describing the input + * data. + * + * This function is called once for every row in the filtered + * result set (before aggregation). It is very important that + * this function is efficient. + * + * If the UDAF is running in a distributed fashion, nextValue + * cannot depend on order, as it will only be called for each + * row found on the specific PM. + * + * valsIn (in) - a vector of the parameters from the row. + */ + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + /** + * subEvaluate() + * + * Mandatory -- Called if the UDAF is running in a distributed + * fashion. Columnstore tries to run all aggregate functions + * distributed, depending on context. + * + * Perform an aggregation on rows partially aggregated by + * nextValue. Columnstore calls nextValue for each row on a + * given PM for a group (GROUP BY). subEvaluate is called on the + * UM to consolodate those values into a single instance of + * userData. Keep your aggregated totals in context's userData. + * The first time this is called for a group, reset() would have + * been called with this version of userData. + * + * Called for every partial data set in each group in GROUP BY. + * + * When subEvaluate has been called for all subAggregated data + * sets, Evaluate will be called with the same context as here. + * + * valIn (In) - This is a pointer to a memory block of the size + * set in setUserDataSize. It will contain the value of userData + * as seen in the last call to NextValue for a given PM. + * + */ + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn); + + /** + * evaluate() + * + * Mandatory. Get the aggregated value. + * + * Called for every new group if UDAF GROUP BY, UDAnF partition + * or, in some cases, new Window Frame. + * + * Set the aggregated value into valOut. The datatype is assumed + * to be the same as that set in the init() function; + * + * If the UDAF is running in a distributed fashion, evaluate is + * called after a series of subEvaluate calls. + * + * valOut (out) - Set the aggregated value here. The datatype is + * assumed to be the same as that set in the init() function; + * + * To return a NULL value, don't assign to valOut. + */ + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + /** + * dropValue() + * + * Optional -- If defined, the server will call this instead of + * reset for UDAnF. + * + * Don't implement if a UDAnF has one or more of the following: + * The UDAnF can't be used with a Window Frame + * The UDAnF is not reversable in some way + * The UDAnF is not interested in optimal performance + * + * If not implemented, reset() followed by a series of + * nextValue() will be called for each movement of the Window + * Frame. + * + * If implemented, then each movement of the Window Frame will + * result in dropValue() being called for each row falling out + * of the Frame and nextValue() being called for each new row + * coming into the Frame. + * + * valsDropped (in) - a vector of the parameters from the row + * leaving the Frame + * + * dropValue() will not be called for unbounded/current row type + * frames, as those are already optimized. + */ + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: + +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_distinct_count.h + From dd99e420e01a2f1ea96865eef190c8e117235b9e Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 2 Oct 2018 11:05:50 -0500 Subject: [PATCH 24/32] MCOL-521 Remove "typename" from certain places as some compiles can't handle it --- utils/windowfunction/wf_udaf.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index b4951edea..f9e38d9a1 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -816,7 +816,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // If it doesn't insert, the original pair will be returned // in distinct.first and distinct.second will be a bool -- // true if newly inserted, false if a duplicate. - std::pair distinct; + std::pair distinct; distinct = fDistinctMap.insert(val); if (distinct.second == false) { @@ -850,7 +850,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) if (k == 0 && fDistinct) { std::pair val = make_pair(valIn, 1); - std::pair distinct; + std::pair distinct; distinct = fDistinctMap.insert(val); if (distinct.second == false) { @@ -889,7 +889,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) if (k == 0 && fDistinct) { std::pair val = make_pair(valIn, 1); - std::pair distinct; + std::pair distinct; distinct = fDistinctMap.insert(val); if (distinct.second == false) { @@ -922,7 +922,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) if (k == 0 && fDistinct) { std::pair val = make_pair(valIn, 1); - std::pair distinct; + std::pair distinct; distinct = fDistinctMap.insert(val); if (distinct.second == false) { @@ -955,7 +955,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) if (k == 0 && fDistinct) { std::pair val = make_pair(valIn, 1); - std::pair distinct; + std::pair distinct; distinct = fDistinctMap.insert(val); if (distinct.second == false) { @@ -991,7 +991,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) if (k == 0 && fDistinct) { std::pair val = make_pair(valIn, 1); - std::pair distinct; + std::pair distinct; distinct = fDistinctMap.insert(val); if (distinct.second == false) { From 3d7f4a31703a0eaf22b18be5097bc8f6781f422c Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Wed, 3 Oct 2018 13:04:54 -0500 Subject: [PATCH 25/32] Squash commit of fix-skip-oam-init branch. Squashed commit of the following: commit faaee9141af019363cbe207e4cdbe01e01493d0f Author: Patrick LeBlanc Date: Wed Oct 3 13:03:02 2018 -0500 Commented a debugging printout. commit e47e784c53705463696916f5c8dae1c014732f77 Author: Patrick LeBlanc Date: Wed Oct 3 12:13:23 2018 -0500 Moved the SKIP_OAM_INIT check for cleanliness, added 'config.h' to other places that need it. commit 662604553538795f9a03e1167c7b44376349a56a Author: Patrick LeBlanc Date: Tue Oct 2 16:33:06 2018 -0500 WIP. First cut of excising OAM and root access from the dev process. This passes most tests; need to make sure that the tests that fail have nothing to do with this change. DMLProc is doing something that results in a sudo password prompt. Will obliterate that next. --- config.h.cmake | 3 +++ configureEngine.cmake | 4 +++- exemgr/main.cpp | 10 ++++++++++ oam/oamcpp/liboamcpp.cpp | 1 + oam/oamcpp/oamcache.cpp | 2 ++ .../redistribute/we_redistributecontrolthread.cpp | 1 + writeengine/splitter/we_sdhandler.cpp | 1 + 7 files changed, 21 insertions(+), 1 deletion(-) diff --git a/config.h.cmake b/config.h.cmake index e320eb82c..74d707b11 100644 --- a/config.h.cmake +++ b/config.h.cmake @@ -2,6 +2,9 @@ #ifndef TEST_CONFIG_H #define TEST_CONFIG_H +/* Define to 1 to let the system come up without using OAM */ +#cmakedefine SKIP_OAM_INIT 1 + /* Define to 1 if you have the `alarm' function. */ #cmakedefine HAVE_ALARM 1 diff --git a/configureEngine.cmake b/configureEngine.cmake index 0a299c146..a3ac9d3c1 100644 --- a/configureEngine.cmake +++ b/configureEngine.cmake @@ -716,7 +716,9 @@ IF (NOT INLINE) SET (inline "") ENDIF() - +IF($ENV{SKIP_OAM_INIT}) + SET(SKIP_OAM_INIT 1) +ENDIF() EXECUTE_PROCESS( COMMAND rm -f conftest.data conftest.file conftest.sym diff --git a/exemgr/main.cpp b/exemgr/main.cpp index 8742188a0..4e3b42347 100644 --- a/exemgr/main.cpp +++ b/exemgr/main.cpp @@ -63,6 +63,7 @@ using namespace std; #include using namespace boost; +#include "config.h" #include "configcpp.h" using namespace config; #include "messagequeue.h" @@ -100,6 +101,10 @@ using namespace querytele; #include "threadpool.h" #include "crashtrace.h" +#if defined(SKIP_OAM_INIT) +#include "dbrm.h" +#endif + namespace { @@ -1601,6 +1606,11 @@ int main(int argc, char* argv[]) { } } +#if defined(SKIP_OAM_INIT) + BRM::DBRM *dbrm = new BRM::DBRM(); + dbrm->setSystemQueryReady(true); + delete dbrm; +#endif threadpool::ThreadPool exeMgrThreadPool(serverThreads, 0); exeMgrThreadPool.setName("ExeMgrServer"); diff --git a/oam/oamcpp/liboamcpp.cpp b/oam/oamcpp/liboamcpp.cpp index d53dd66de..bba892b0d 100644 --- a/oam/oamcpp/liboamcpp.cpp +++ b/oam/oamcpp/liboamcpp.cpp @@ -56,6 +56,7 @@ #ifdef _MSC_VER #include "idbregistry.h" #endif +#include "config.h" #include "installdir.h" #include "dbrm.h" #include "sessionmanager.h" diff --git a/oam/oamcpp/oamcache.cpp b/oam/oamcpp/oamcache.cpp index 084f88157..d8035bbf3 100644 --- a/oam/oamcpp/oamcache.cpp +++ b/oam/oamcpp/oamcache.cpp @@ -34,6 +34,7 @@ using namespace boost; #include "exceptclasses.h" #include "configcpp.h" #include "installdir.h" +#include "config.h" namespace { @@ -180,6 +181,7 @@ void OamCache::checkReload() } } #else + pmToConnectionMap[*it] = i++; moduleIds.push_back(*it); #endif it++; diff --git a/writeengine/redistribute/we_redistributecontrolthread.cpp b/writeengine/redistribute/we_redistributecontrolthread.cpp index ab2897834..70b5d1e2d 100644 --- a/writeengine/redistribute/we_redistributecontrolthread.cpp +++ b/writeengine/redistribute/we_redistributecontrolthread.cpp @@ -37,6 +37,7 @@ using namespace std; #include "boost/filesystem/operations.hpp" using namespace boost; +#include "config.h" #include "installdir.h" #include "configcpp.h" diff --git a/writeengine/splitter/we_sdhandler.cpp b/writeengine/splitter/we_sdhandler.cpp index 61fe45239..647a90e89 100644 --- a/writeengine/splitter/we_sdhandler.cpp +++ b/writeengine/splitter/we_sdhandler.cpp @@ -43,6 +43,7 @@ using namespace std; #include using namespace boost; +#include "config.h" #include "configcpp.h" using namespace config; From a127f847934d2b72bdb4a0f006c25a14ad73a9bb Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Thu, 4 Oct 2018 16:53:14 +0100 Subject: [PATCH 26/32] MCOL-1433 Fix TIME for MAKEDATE/TIMEDIFF Fix saturation behaviour for TIME with MAKEDATE() and TIMEDIFF() --- utils/funcexp/func_makedate.cpp | 7 ++++--- utils/funcexp/func_timediff.cpp | 5 +++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/utils/funcexp/func_makedate.cpp b/utils/funcexp/func_makedate.cpp index 948b612de..5d013728f 100644 --- a/utils/funcexp/func_makedate.cpp +++ b/utils/funcexp/func_makedate.cpp @@ -149,9 +149,10 @@ uint64_t makedate(rowgroup::Row& row, case CalpontSystemCatalog::TIME: { std::ostringstream ss; - Time aTime = parm[1]->data()->getTimeIntVal(row, isNull); - ss << aTime.hour << aTime.minute << aTime.second; - dayofyear = ss.str(); + char buf[9]; + uint64_t aTime = parm[1]->data()->getTimeIntVal(row, isNull); + DataConvert::timeToString1(aTime, buf, 9); + dayofyear = buf; break; } diff --git a/utils/funcexp/func_timediff.cpp b/utils/funcexp/func_timediff.cpp index 742e8faf7..369bb80a1 100644 --- a/utils/funcexp/func_timediff.cpp +++ b/utils/funcexp/func_timediff.cpp @@ -118,6 +118,11 @@ string Func_timediff::getStrVal(rowgroup::Row& row, case execplan::CalpontSystemCatalog::TIME: case execplan::CalpontSystemCatalog::DATETIME: + if (type1 != type2) + { + isNull = true; + break; + } val1 = parm[0]->data()->getDatetimeIntVal(row, isNull); break; From 035c93fe88e43614950c4d211e9e09fb8759eae5 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Fri, 5 Oct 2018 19:45:17 +0300 Subject: [PATCH 27/32] MCOL-1771 Removed extra debug output from release builds. --- dbcon/mysql/sm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbcon/mysql/sm.h b/dbcon/mysql/sm.h index 65cf35123..dafa64419 100644 --- a/dbcon/mysql/sm.h +++ b/dbcon/mysql/sm.h @@ -64,7 +64,7 @@ const int CALPONT_INTERNAL_ERROR = -1007; //extern std::ofstream smlog; //#define SMDEBUGLOG smlog //#else -#define SMDEBUGLOG if (true) std::cerr +#define SMDEBUGLOG if (false) std::cout //#endif extern const std::string DEFAULT_SAVE_PATH; From 3fffc75d86c31d8dacd6a54cbbfc44e8b9f31d0a Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Fri, 5 Oct 2018 21:48:51 +0100 Subject: [PATCH 28/32] Fix brace merge issue --- primitives/blockcache/filebuffermgr.cpp | 2 ++ procmon/main.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/primitives/blockcache/filebuffermgr.cpp b/primitives/blockcache/filebuffermgr.cpp index 0d3032743..013ef7603 100644 --- a/primitives/blockcache/filebuffermgr.cpp +++ b/primitives/blockcache/filebuffermgr.cpp @@ -219,6 +219,8 @@ void FileBufferMgr::flushManyAllversion(const LBID_t* laVptr, uint32_t cnt) for (it = fbSet.begin(); it != fbSet.end();) { + if (uniquer.find(it->lbid) != uniquer.end()) + { if (fReportFrequency) { fLog << "flushManyAllversion hit: " << it->lbid << " index: " << it->poolIdx << endl; diff --git a/procmon/main.cpp b/procmon/main.cpp index 3e383ab10..4ff8500ce 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -224,7 +224,7 @@ int main(int argc, char** argv) } catch (...) {} - if ( cloud == "amazon-ec2" || cloud == "amazon-vpc" ) { + if ( cloud == "amazon-ec2" || cloud == "amazon-vpc" ) { if (!aMonitor.amazonIPCheck()) { From 6a72b28bab4f5c598671af895f5cc9d0bcdfee00 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Sat, 6 Oct 2018 11:27:51 +0100 Subject: [PATCH 29/32] MCOL-1775 Fix addtime/subtime for WHERE MariaDB server renamed addtime/subtime so we need to use the new names for these functions. --- dbcon/mysql/ha_calpont_execplan.cpp | 12 +++++++++--- utils/funcexp/funcexp.cpp | 2 ++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 3b109ade0..a4d3bb91b 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -3665,11 +3665,17 @@ ReturnedColumn* buildFunctionColumn( gwi.no_parm_func_list.push_back(fc); } - // add the sign for addtime function - if (funcName == "add_time") + // func name is addtime/subtime in 10.3.9 + // note: this means get_time() can now go away in our server fork + if ((funcName == "addtime") || (funcName == "subtime")) { + int64_t sign = 1; + if (funcName == "subtime") + { + sign = -1; + } Item_func_add_time* addtime = (Item_func_add_time*)ifp; - sptp.reset(new ParseTree(new ConstantColumn((int64_t)addtime->get_sign()))); + sptp.reset(new ParseTree(new ConstantColumn(sign))); funcParms.push_back(sptp); } diff --git a/utils/funcexp/funcexp.cpp b/utils/funcexp/funcexp.cpp index 66782cc54..53f7da595 100644 --- a/utils/funcexp/funcexp.cpp +++ b/utils/funcexp/funcexp.cpp @@ -75,6 +75,8 @@ FuncExp::FuncExp() fFuncMap["abs"] = new Func_abs(); fFuncMap["acos"] = new Func_acos(); fFuncMap["add_time"] = new Func_add_time(); + fFuncMap["addtime"] = new Func_add_time(); + fFuncMap["subtime"] = new Func_add_time(); fFuncMap["asin"] = new Func_asin(); fFuncMap["ascii"] = new Func_ascii(); fFuncMap["atan"] = new Func_atan(); From 38d0740ec152b441a938dbf0580194a09522977b Mon Sep 17 00:00:00 2001 From: Gagan Goel Date: Sun, 7 Oct 2018 00:34:09 -0400 Subject: [PATCH 30/32] MCOL-266 Add support for BOOLEAN/BOOL data type --- dbcon/ddlpackage/ddl.l | 2 ++ dbcon/ddlpackage/ddl.y | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/dbcon/ddlpackage/ddl.l b/dbcon/ddlpackage/ddl.l index 179e5c14b..305adb394 100644 --- a/dbcon/ddlpackage/ddl.l +++ b/dbcon/ddlpackage/ddl.l @@ -184,6 +184,8 @@ TINYTEXT {return TINYTEXT;} TEXT {return TEXT;} MEDIUMTEXT {return MEDIUMTEXT;} LONGTEXT {return LONGTEXT;} +BOOL {return BOOL;} +BOOLEAN {return BOOLEAN;} \n { lineno++;} diff --git a/dbcon/ddlpackage/ddl.y b/dbcon/ddlpackage/ddl.y index 37ab49425..2b35c8392 100644 --- a/dbcon/ddlpackage/ddl.y +++ b/dbcon/ddlpackage/ddl.y @@ -112,6 +112,7 @@ MIN_ROWS MODIFY NO NOT NULL_TOK NUMBER NUMERIC ON PARTIAL PRECISION PRIMARY REFERENCES RENAME RESTRICT SET SMALLINT TABLE TEXT TINYBLOB TINYTEXT TINYINT TO UNIQUE UNSIGNED UPDATE USER SESSION_USER SYSTEM_USER VARCHAR VARBINARY VARYING WITH ZONE DOUBLE IDB_FLOAT REAL CHARSET IDB_IF EXISTS CHANGE TRUNCATE +BOOL BOOLEAN %token DQ_IDENT IDENT FCONST SCONST CP_SEARCH_CONDITION_TEXT ICONST DATE TIME @@ -1041,6 +1042,18 @@ exact_numeric_type: $$ = new ColumnType(DDL_UNSIGNED_BIGINT); $$->fLength = DDLDatatypeLength[DDL_BIGINT]; } + | BOOLEAN + { + $$ = new ColumnType(DDL_TINYINT); + $$->fLength = DDLDatatypeLength[DDL_TINYINT]; + $$->fPrecision = 1; + } + | BOOL + { + $$ = new ColumnType(DDL_TINYINT); + $$->fLength = DDLDatatypeLength[DDL_TINYINT]; + $$->fPrecision = 1; + } ; /* Bug 1570, change default scale to 0 from -1 */ opt_precision_scale: From d1f02026c5975cf50277ab9b630fb6dbaa531c6e Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Sun, 7 Oct 2018 16:19:50 +0300 Subject: [PATCH 31/32] Backported the rest of MCOL-1659(spaces in identifiers). --- dbcon/ddlpackage/ddl.l | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbcon/ddlpackage/ddl.l b/dbcon/ddlpackage/ddl.l index aa909374f..430adde8e 100644 --- a/dbcon/ddlpackage/ddl.l +++ b/dbcon/ddlpackage/ddl.l @@ -78,6 +78,7 @@ extended_identifier {ident_start}{extended_ident_cont}* ident_w_spaces {identifier}\x20* identifier_quoted {grave_accent}{extended_identifier}{grave_accent} identifier_double_quoted {double_quote}{extended_identifier}{double_quote} +column_ident_quoted {grave_accent}{ident_w_spaces}+{grave_accent} integer [-+]?{digit}+ decimal ([-+]?({digit}*\.{digit}+)|({digit}+\.{digit}*)) @@ -187,6 +188,8 @@ LONGTEXT {return LONGTEXT;} \n { lineno++;} +{column_ident_quoted} { ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner, STRIP_QUOTES); return IDENT;} + {whitespace} { /* ignore */ } From 47fbf62bfef480e87a916a04a717a04fb0ff1548 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Mon, 8 Oct 2018 09:20:46 +0100 Subject: [PATCH 32/32] MCOL-1775 Remove warning on unused var --- dbcon/mysql/ha_calpont_execplan.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index a4d3bb91b..b98653af7 100644 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -3674,7 +3674,6 @@ ReturnedColumn* buildFunctionColumn( { sign = -1; } - Item_func_add_time* addtime = (Item_func_add_time*)ifp; sptp.reset(new ParseTree(new ConstantColumn(sign))); funcParms.push_back(sptp); }