diff --git a/dbcon/execplan/calpontsystemcatalog.cpp b/dbcon/execplan/calpontsystemcatalog.cpp index f5f15a2ce..16b98583e 100644 --- a/dbcon/execplan/calpontsystemcatalog.cpp +++ b/dbcon/execplan/calpontsystemcatalog.cpp @@ -6099,7 +6099,8 @@ CalpontSystemCatalog::ColType::ColType() : compressionType(NO_COMPRESSION), columnOID(0), autoincrement(0), - nextvalue(0) + nextvalue(0), + cs(NULL) { charsetNumber = default_charset_info->number; } @@ -6118,10 +6119,16 @@ CalpontSystemCatalog::ColType::ColType(const ColType& rhs) columnOID = rhs.columnOID; autoincrement = rhs.autoincrement; nextvalue = rhs.nextvalue; - charsetNumber = default_charset_info->number; - charsetNumber = default_charset_info->number; + charsetNumber = rhs.charsetNumber; + cs = rhs.cs; } +CHARSET_INFO* CalpontSystemCatalog::ColType::getCharset() +{ + if (!cs) + cs= get_charset(charsetNumber, MYF(MY_WME)); + return cs; +} const string CalpontSystemCatalog::ColType::toString() const { diff --git a/dbcon/execplan/calpontsystemcatalog.h b/dbcon/execplan/calpontsystemcatalog.h index dc9f1fc13..9f6892a5f 100644 --- a/dbcon/execplan/calpontsystemcatalog.h +++ b/dbcon/execplan/calpontsystemcatalog.h @@ -48,6 +48,11 @@ #undef min #undef max +// Because including my_sys.h in a Columnstore header causes too many conflicts +struct charset_info_st; +typedef const struct charset_info_st CHARSET_INFO; + + #ifdef _MSC_VER #define __attribute__(x) #endif @@ -293,9 +298,11 @@ public: bool autoincrement; //set to true if SYSCOLUMN autoincrement is �y� uint64_t nextvalue; //next autoincrement value uint32_t charsetNumber; + const CHARSET_INFO* cs; ColType(const ColType& rhs); + CHARSET_INFO* getCharset(); // for F&E use. only serialize necessary info for now void serialize (messageqcpp::ByteStream& b) const { diff --git a/dbcon/execplan/treenode.h b/dbcon/execplan/treenode.h index 6a85d1ee3..5fab31a02 100644 --- a/dbcon/execplan/treenode.h +++ b/dbcon/execplan/treenode.h @@ -336,7 +336,7 @@ public: fRefCount = refCount; } - // the inc and dec functions are used by connector single thread. + // the inc and dec functions areparm[n]->data() used by connector single thread. virtual void decRefCount() { fRefCount--; @@ -458,6 +458,7 @@ public: void charsetNumber(uint32_t cnum) { fResultType.charsetNumber = cnum; + fOperationType.charsetNumber = cnum; } protected: diff --git a/dbcon/joblist/expressionstep.cpp b/dbcon/joblist/expressionstep.cpp index e5d656936..dab4089d3 100644 --- a/dbcon/joblist/expressionstep.cpp +++ b/dbcon/joblist/expressionstep.cpp @@ -397,8 +397,8 @@ void ExpressionStep::populateColumnInfo(SimpleColumn* sc, JobInfo& jobInfo) //XXX use this before connector sets colType in sc correctly. // type of pseudo column is set by connector - if (dynamic_cast(sc) == NULL) - ct = jobInfo.csc->colType(sc->oid()); +// if (dynamic_cast(sc) == NULL) +// ct = jobInfo.csc->colType(sc->oid()); //X if (ct.scale == 0) // keep passed original ct for decimal type diff --git a/dbcon/joblist/groupconcat.cpp b/dbcon/joblist/groupconcat.cpp index d48c5e289..0c8c355a9 100644 --- a/dbcon/joblist/groupconcat.cpp +++ b/dbcon/joblist/groupconcat.cpp @@ -211,6 +211,7 @@ void GroupConcatInfo::mapColumns(const RowGroup& projRG) vector scale; vector precision; vector types; + vector csNums; pos.push_back(2); vector >::iterator i1 = (*k)->fGroupCols.begin(); @@ -229,6 +230,7 @@ void GroupConcatInfo::mapColumns(const RowGroup& projRG) oids.push_back(projRG.getOIDs()[j->second]); keys.push_back(projRG.getKeys()[j->second]); types.push_back(projRG.getColTypes()[j->second]); + csNums.push_back(projRG.getCharsetNumber(j->second)); scale.push_back(projRG.getScale()[j->second]); precision.push_back(projRG.getPrecision()[j->second]); @@ -258,6 +260,7 @@ void GroupConcatInfo::mapColumns(const RowGroup& projRG) oids.push_back(projRG.getOIDs()[j->second]); keys.push_back(projRG.getKeys()[j->second]); types.push_back(projRG.getColTypes()[j->second]); + csNums.push_back(projRG.getCharsetNumber(j->second)); scale.push_back(projRG.getScale()[j->second]); precision.push_back(projRG.getPrecision()[j->second]); } @@ -271,7 +274,7 @@ void GroupConcatInfo::mapColumns(const RowGroup& projRG) i2++; } - (*k)->fRowGroup = RowGroup(oids.size(), pos, oids, keys, types, scale, precision, projRG.getStringTableThreshold(), false); + (*k)->fRowGroup = RowGroup(oids.size(), pos, oids, keys, types, csNums, scale, precision, projRG.getStringTableThreshold(), false); (*k)->fMapping = makeMapping(projRG, (*k)->fRowGroup); } } diff --git a/dbcon/joblist/jlf_common.cpp b/dbcon/joblist/jlf_common.cpp index 576358ea2..fe3195653 100644 --- a/dbcon/joblist/jlf_common.cpp +++ b/dbcon/joblist/jlf_common.cpp @@ -196,7 +196,7 @@ TupleInfo setTupleInfo_(const CalpontSystemCatalog::ColType& ct, { //Haven't even seen the table yet, much less this col ti = TupleInfo(fudgeWidth(ct, col_oid), col_oid, col_key, tbl_key, - ct.scale, ct.precision, ct.colDataType); + ct.scale, ct.precision, ct.colDataType, ct.charsetNumber); jobInfo.keyInfo->tupleInfoMap[col_key] = ti; jobInfo.keyInfo->colKeyToTblKey[col_key] = tbl_key; jobInfo.keyInfo->colKeyToTblKey[tbl_key] = tbl_key; diff --git a/dbcon/joblist/jlf_common.h b/dbcon/joblist/jlf_common.h index 5c5a14b8e..fe6c477d9 100644 --- a/dbcon/joblist/jlf_common.h +++ b/dbcon/joblist/jlf_common.h @@ -74,8 +74,8 @@ const int32_t CNX_EXP_TABLE_ID = 999; struct TupleInfo { TupleInfo(uint32_t w = 0, uint32_t o = 0, uint32_t k = -1, uint32_t t = -1, uint32_t s = 0, uint32_t p = 0, - execplan::CalpontSystemCatalog::ColDataType dt = execplan::CalpontSystemCatalog::BIT) : - width(w), oid(o), key(k), tkey(t), scale(s), precision(p), dtype(dt) { } + execplan::CalpontSystemCatalog::ColDataType dt = execplan::CalpontSystemCatalog::BIT, uint32_t csn = 8) : + width(w), oid(o), key(k), tkey(t), scale(s), precision(p), dtype(dt), csNum(csn) { } ~TupleInfo() { } uint32_t width; @@ -85,6 +85,7 @@ struct TupleInfo uint32_t scale; uint32_t precision; execplan::CalpontSystemCatalog::ColDataType dtype; + uint32_t csNum; // For collations }; // for compound join diff --git a/dbcon/joblist/jlf_tuplejoblist.cpp b/dbcon/joblist/jlf_tuplejoblist.cpp index dbcba47ea..8f24193ce 100644 --- a/dbcon/joblist/jlf_tuplejoblist.cpp +++ b/dbcon/joblist/jlf_tuplejoblist.cpp @@ -159,13 +159,14 @@ void tupleKeyToProjectStep(uint32_t key, JobStepVector& jsv, JobInfo& jobInfo) inline void addColumnToRG(uint32_t cid, vector& pos, vector& oids, vector& keys, vector& scale, vector& precision, - vector& types, JobInfo& jobInfo) + vector& types, vector& csNums, JobInfo& jobInfo) { TupleInfo ti(getTupleInfo(cid, jobInfo)); pos.push_back(pos.back() + ti.width); oids.push_back(ti.oid); keys.push_back(ti.key); types.push_back(ti.dtype); + csNums.push_back(ti.csNum); scale.push_back(ti.scale); precision.push_back(ti.precision); } @@ -173,19 +174,20 @@ inline void addColumnToRG(uint32_t cid, vector& pos, vector& inline void addColumnInExpToRG(uint32_t cid, vector& pos, vector& oids, vector& keys, vector& scale, vector& precision, - vector& types, JobInfo& jobInfo) + vector& types, vector& csNums, JobInfo& jobInfo) { if (jobInfo.keyInfo->dictKeyMap.find(cid) != jobInfo.keyInfo->dictKeyMap.end()) cid = jobInfo.keyInfo->dictKeyMap[cid]; if (find(keys.begin(), keys.end(), cid) == keys.end()) - addColumnToRG(cid, pos, oids, keys, scale, precision, types, jobInfo); + addColumnToRG(cid, pos, oids, keys, scale, precision, types, csNums, jobInfo); } inline void addColumnsToRG(uint32_t tid, vector& pos, vector& oids, vector& keys, vector& scale, vector& precision, vector& types, + vector& csNums, TableInfoMap& tableInfoMap, JobInfo& jobInfo) { // -- the selected columns @@ -193,7 +195,7 @@ inline void addColumnsToRG(uint32_t tid, vector& pos, vector for (unsigned i = 0; i < pjCol.size(); i++) { - addColumnToRG(pjCol[i], pos, oids, keys, scale, precision, types, jobInfo); + addColumnToRG(pjCol[i], pos, oids, keys, scale, precision, types, csNums, jobInfo); } // -- any columns will be used in cross-table exps @@ -201,7 +203,7 @@ inline void addColumnsToRG(uint32_t tid, vector& pos, vector for (unsigned i = 0; i < exp2.size(); i++) { - addColumnInExpToRG(exp2[i], pos, oids, keys, scale, precision, types, jobInfo); + addColumnInExpToRG(exp2[i], pos, oids, keys, scale, precision, types, csNums, jobInfo); } // -- any columns will be used in returned exps @@ -209,7 +211,7 @@ inline void addColumnsToRG(uint32_t tid, vector& pos, vector for (unsigned i = 0; i < expr.size(); i++) { - addColumnInExpToRG(expr[i], pos, oids, keys, scale, precision, types, jobInfo); + addColumnInExpToRG(expr[i], pos, oids, keys, scale, precision, types, csNums, jobInfo); } // -- any columns will be used in final outer join expression @@ -217,7 +219,7 @@ inline void addColumnsToRG(uint32_t tid, vector& pos, vector for (unsigned i = 0; i < expo.size(); i++) { - addColumnInExpToRG(expo[i], pos, oids, keys, scale, precision, types, jobInfo); + addColumnInExpToRG(expo[i], pos, oids, keys, scale, precision, types, csNums, jobInfo); } } @@ -232,6 +234,7 @@ void constructJoinedRowGroup(RowGroup& rg, uint32_t large, uint32_t prev, bool r vector scale; vector precision; vector types; + vector csNums; pos.push_back(2); // -- start with the join keys @@ -242,14 +245,14 @@ void constructJoinedRowGroup(RowGroup& rg, uint32_t large, uint32_t prev, bool r vector& joinKeys = jobInfo.tableJoinMap[make_pair(large, prev)].fLeftKeys; for (vector::iterator i = joinKeys.begin(); i != joinKeys.end(); i++) - addColumnToRG(*i, pos, oids, keys, scale, precision, types, jobInfo); + addColumnToRG(*i, pos, oids, keys, scale, precision, types, csNums, jobInfo); } // -- followed by the columns in select or expression for (set::iterator i = tableSet.begin(); i != tableSet.end(); i++) - addColumnsToRG(*i, pos, oids, keys, scale, precision, types, tableInfoMap, jobInfo); + addColumnsToRG(*i, pos, oids, keys, scale, precision, types, csNums, tableInfoMap, jobInfo); - RowGroup tmpRg(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold); + RowGroup tmpRg(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold); rg = tmpRg; } @@ -264,12 +267,13 @@ void constructJoinedRowGroup(RowGroup& rg, set& tableSet, TableInfoMap vector scale; vector precision; vector types; + vector csNums; pos.push_back(2); for (set::iterator i = tableSet.begin(); i != tableSet.end(); i++) { // columns in select or expression - addColumnsToRG(*i, pos, oids, keys, scale, precision, types, tableInfoMap, jobInfo); + addColumnsToRG(*i, pos, oids, keys, scale, precision, types, csNums, tableInfoMap, jobInfo); // keys to be joined if not already in the rowgroup vector& adjList = tableInfoMap[*i].fAdjacentList; @@ -284,13 +288,13 @@ void constructJoinedRowGroup(RowGroup& rg, set& tableSet, TableInfoMap for (vector::iterator k = joinKeys.begin(); k != joinKeys.end(); k++) { if (find(keys.begin(), keys.end(), *k) == keys.end()) - addColumnToRG(*k, pos, oids, keys, scale, precision, types, jobInfo); + addColumnToRG(*k, pos, oids, keys, scale, precision, types, csNums, jobInfo); } } } } - RowGroup tmpRg(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold); + RowGroup tmpRg(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold); rg = tmpRg; } @@ -339,6 +343,7 @@ void adjustLastStep(JobStepVector& querySteps, DeliveredTableMap& deliverySteps, vector scale; vector precision; vector types; + vector csNums; pos.push_back(2); for (unsigned i = 0; i < v.size(); i++) @@ -347,11 +352,12 @@ void adjustLastStep(JobStepVector& querySteps, DeliveredTableMap& deliverySteps, oids.push_back(v[i].oid); keys.push_back(v[i].key); types.push_back(v[i].dtype); + csNums.push_back(v[i].csNum); scale.push_back(v[i].scale); precision.push_back(v[i].precision); } - RowGroup rg1(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold); + RowGroup rg1(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold); // evaluate the returned/groupby expressions if any JobStepVector& expSteps = jobInfo.returnedExpressions; @@ -365,6 +371,7 @@ void adjustLastStep(JobStepVector& querySteps, DeliveredTableMap& deliverySteps, scale.clear(); precision.clear(); types.clear(); + csNums.clear(); pos.push_back(2); const vector& keys0 = rg0->getKeys(); @@ -377,6 +384,7 @@ void adjustLastStep(JobStepVector& querySteps, DeliveredTableMap& deliverySteps, oids.push_back(v[i].oid); keys.push_back(v[i].key); types.push_back(v[i].dtype); + csNums.push_back(v[i].csNum); scale.push_back(v[i].scale); precision.push_back(v[i].precision); } @@ -384,7 +392,7 @@ void adjustLastStep(JobStepVector& querySteps, DeliveredTableMap& deliverySteps, // for v0.9.3.0, the output and input to the expression are in the same row // add the returned column into the rg0 as rg01 - RowGroup rg01 = *rg0 + RowGroup(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold); + RowGroup rg01 = *rg0 + RowGroup(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold); if (jobInfo.trace) cout << "Output RowGroup 01: " << rg01.toString() << endl; @@ -640,6 +648,7 @@ void addProjectStepsToBps(TableInfoMap::iterator& mit, BatchPrimitive* bps, JobI vector scale; vector precision; vector types; + vector csNums; pos.push_back(2); // this psv is a copy of the project steps, the original vector in mit is not changed @@ -730,6 +739,7 @@ void addProjectStepsToBps(TableInfoMap::iterator& mit, BatchPrimitive* bps, JobI oids.push_back(ti.oid); keys.push_back(ti.key); types.push_back(ti.dtype); + csNums.push_back(ti.csNum); scale.push_back(ti.scale); precision.push_back(ti.precision); } @@ -742,12 +752,13 @@ void addProjectStepsToBps(TableInfoMap::iterator& mit, BatchPrimitive* bps, JobI oids.push_back(ti.oid); keys.push_back(ti.key); types.push_back(ti.dtype); + csNums.push_back(ti.csNum); scale.push_back(ti.scale); precision.push_back(ti.precision); } // construct RowGroup - RowGroup rg(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold); + RowGroup rg(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold); // fix the output association AnyDataListSPtr spdl(new AnyDataList()); @@ -818,6 +829,7 @@ void addExpresssionStepsToBps(TableInfoMap::iterator& mit, SJSTEP& sjsp, JobInfo vector scale; vector precision; vector types; + vector csNums; pos.push_back(2); vector cols; @@ -854,12 +866,13 @@ void addExpresssionStepsToBps(TableInfoMap::iterator& mit, SJSTEP& sjsp, JobInfo oids.push_back(ti.oid); keys.push_back(ti.key); types.push_back(ti.dtype); + csNums.push_back(ti.csNum); scale.push_back(ti.scale); precision.push_back(ti.precision); } // construct RowGroup and add to TBPS - RowGroup rg(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold); + RowGroup rg(oids.size(), pos, oids, keys, types, scale, csNums, precision, jobInfo.stringTableThreshold); bps->setFE1Input(rg); if (jobInfo.trace) cout << "FE1 input RowGroup: " << rg.toString() << endl << endl; @@ -1025,6 +1038,7 @@ bool combineJobStepsByTable(TableInfoMap::iterator& mit, JobInfo& jobInfo) vector scale; vector precision; vector types; + vector csNums; pos.push_back(2); pos.push_back(2 + 8); @@ -1033,10 +1047,11 @@ bool combineJobStepsByTable(TableInfoMap::iterator& mit, JobInfo& jobInfo) uint32_t keyId = pds->tupleId(); keys.push_back(keyId); types.push_back(CalpontSystemCatalog::BIGINT); + csNums.push_back(pds->colType().charsetNumber); scale.push_back(0); precision.push_back(0); - RowGroup rg(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold); + RowGroup rg(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold); if (jobInfo.trace) cout << "RowGroup pds(and): " << rg.toString() << endl; @@ -1341,6 +1356,7 @@ bool combineJobStepsByTable(TableInfoMap::iterator& mit, JobInfo& jobInfo) vector scale; vector precision; vector types; + vector csNums; pos.push_back(2); for (unsigned i = 0; i < tis.size(); i++) @@ -1349,11 +1365,12 @@ bool combineJobStepsByTable(TableInfoMap::iterator& mit, JobInfo& jobInfo) oids.push_back(tis[i].oid); keys.push_back(tis[i].key); types.push_back(tis[i].dtype); + csNums.push_back(tis[i].csNum); scale.push_back(tis[i].scale); precision.push_back(tis[i].precision); } - RowGroup addRg(oids.size(), pos, oids, keys, types, scale, precision, + RowGroup addRg(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold); RowGroup feRg1 = feRg; @@ -3985,6 +4002,7 @@ SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& vector precision; vector width; vector types; + vector csNums; JobStepAssociation jsaToUnion; // bug4388, share code with connector for column type coversion @@ -4009,10 +4027,12 @@ SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& const vector& scaleIn = rg.getScale(); const vector& precisionIn = rg.getPrecision(); const vector& typesIn = rg.getColTypes(); - + const vector& csNumsIn = rg.getCharsetNumbers(); + for (uint64_t j = 0; j < colCount; ++j) { queryColTypes[j][i].colDataType = typesIn[j]; + queryColTypes[j][i].charsetNumber = csNumsIn[j]; queryColTypes[j][i].scale = scaleIn[j]; queryColTypes[j][i].precision = precisionIn[j]; queryColTypes[j][i].colWidth = rg.getColumnWidth(j); @@ -4054,6 +4074,7 @@ SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& { CalpontSystemCatalog::ColType colType = DataConvert::convertUnionColType(queryColTypes[j]); types.push_back(colType.colDataType); + csNums.push_back(colType.charsetNumber); scale.push_back(colType.scale); precision.push_back(colType.precision); width.push_back(colType.colWidth); @@ -4067,7 +4088,7 @@ SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo& unionStep->setInputRowGroups(inputRGs); unionStep->setDistinctFlags(distinct); - unionStep->setOutputRowGroup(RowGroup(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold)); + unionStep->setOutputRowGroup(RowGroup(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold)); // Fix for bug 4388 adjusts the result type at connector side, this workaround is obsolete. // bug 3067, update the returned column types. diff --git a/dbcon/joblist/pdictionaryscan.cpp b/dbcon/joblist/pdictionaryscan.cpp index 8cf910dd2..cf1c9af50 100644 --- a/dbcon/joblist/pdictionaryscan.cpp +++ b/dbcon/joblist/pdictionaryscan.cpp @@ -138,7 +138,7 @@ pDictionaryScan::pDictionaryScan( sendWaiting(false), ridCount(0), ridList(0), - colType(ct), + fColType(ct), pThread(0), cThread(0), fScanLbidReqLimit(jobInfo.rm->getJlScanLbidReqLimit()), @@ -505,8 +505,8 @@ void pDictionaryScan::sendAPrimitiveMessage( hdr.COP2 = fCOP2; hdr.NVALS = fFilterCount; hdr.Count = msgLbidCount; - hdr.CompType = colType.ddn.compressionType; - hdr.charsetNumber = colType.charsetNumber; + hdr.CompType = fColType.ddn.compressionType; + hdr.charsetNumber = fColType.charsetNumber; idbassert(hdr.Count > 0); if (isEquality) diff --git a/dbcon/joblist/primitivestep.h b/dbcon/joblist/primitivestep.h index c6e9134ff..e97136912 100644 --- a/dbcon/joblist/primitivestep.h +++ b/dbcon/joblist/primitivestep.h @@ -960,6 +960,11 @@ public: void appendFilter(const messageqcpp::ByteStream& filter, unsigned count); virtual void abort(); + + const execplan::CalpontSystemCatalog::ColType& colType() const + { + return fColType; + } protected: void sendError(uint16_t error); @@ -992,7 +997,7 @@ private: uint32_t fLogicalBlocksPerScan; DataList* ridList; messageqcpp::ByteStream fFilterString; - execplan::CalpontSystemCatalog::ColType colType; + execplan::CalpontSystemCatalog::ColType fColType; uint64_t pThread; //producer thread. thread pool handle uint64_t cThread; //consumer thread. thread pool handle DataList_t* requestList; diff --git a/dbcon/joblist/subquerytransformer.cpp b/dbcon/joblist/subquerytransformer.cpp index b66bcb310..cd5583c26 100644 --- a/dbcon/joblist/subquerytransformer.cpp +++ b/dbcon/joblist/subquerytransformer.cpp @@ -198,6 +198,7 @@ SJSTEP& SubQueryTransformer::makeSubQueryStep(execplan::CalpontSelectExecutionPl vector scale; vector precision; vector types; + vector csNums; pos.push_back(2); CalpontSystemCatalog::OID tblOid = fVtable.tableOid(); @@ -229,6 +230,7 @@ SJSTEP& SubQueryTransformer::makeSubQueryStep(execplan::CalpontSelectExecutionPl { ct.colWidth = row.getColumnWidth(i); ct.colDataType = row.getColTypes()[i]; + ct.charsetNumber = row.getCharsetNumber(i); ct.scale = row.getScale(i); if (colDataTypeInRg != CalpontSystemCatalog::FLOAT && @@ -268,6 +270,7 @@ SJSTEP& SubQueryTransformer::makeSubQueryStep(execplan::CalpontSelectExecutionPl oids.push_back(ti.oid); keys.push_back(ti.key); types.push_back(ti.dtype); + csNums.push_back(ti.csNum); scale.push_back(ti.scale); precision.push_back(ti.precision); } @@ -276,7 +279,7 @@ SJSTEP& SubQueryTransformer::makeSubQueryStep(execplan::CalpontSelectExecutionPl fVtable.columnType(i); } - RowGroup rg1(oids.size(), pos, oids, keys, types, scale, precision, csep->stringTableThreshold()); + RowGroup rg1(oids.size(), pos, oids, keys, types, csNums, scale, precision, csep->stringTableThreshold()); rg1.setUseStringTable(rg.usesStringTable()); dynamic_cast(fSubQueryStep.get())->setOutputRowGroup(rg1); diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index eb4cc52d9..8ba3473c5 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -758,13 +758,14 @@ void TupleAggregateStep::configDeliveredRowGroup(const JobInfo& jobInfo) vector::const_iterator offsets0 = fRowGroupOut.getOffsets().begin(); vector::const_iterator types0 = fRowGroupOut.getColTypes().begin(); - + vector csNums = fRowGroupOut.getCharsetNumbers(); vector::const_iterator precision0 = fRowGroupOut.getPrecision().begin(); fRowGroupDelivered = RowGroup(retColCount, vector(offsets0, offsets0 + retColCount + 1), vector(oids.begin(), oids.begin() + retColCount), vector(keys.begin(), keys.begin() + retColCount), vector(types0, types0 + retColCount), + vector(csNums.begin(), csNums.begin() + retColCount), vector(scale.begin(), scale.begin() + retColCount), vector(precision0, precision0 + retColCount), jobInfo.stringTableThreshold); @@ -1037,6 +1038,7 @@ void TupleAggregateStep::prep1PhaseAggregate( const vector& scaleProj = projRG.getScale(); const vector& precisionProj = projRG.getPrecision(); const vector& typeProj = projRG.getColTypes(); + const vector& csNumProj = projRG.getCharsetNumbers(); vector posAgg; vector oidsAgg; @@ -1044,6 +1046,7 @@ void TupleAggregateStep::prep1PhaseAggregate( vector scaleAgg; vector precisionAgg; vector typeAgg; + vector csNumAgg; vector widthAgg; vector groupBy; vector functionVec; @@ -1108,6 +1111,7 @@ void TupleAggregateStep::prep1PhaseAggregate( scaleAgg.push_back(ti.scale); precisionAgg.push_back(ti.precision); typeAgg.push_back(ti.dtype); + csNumAgg.push_back(ti.csNum); widthAgg.push_back(ti.width); SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( aggOp, stats, 0, outIdx, jobInfo.cntStarPos)); @@ -1126,6 +1130,7 @@ void TupleAggregateStep::prep1PhaseAggregate( scaleAgg.push_back(ti.scale); precisionAgg.push_back(ti.precision); typeAgg.push_back(ti.dtype); + csNumAgg.push_back(ti.csNum); widthAgg.push_back(width); SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( aggOp, stats, 0, outIdx, -1)); @@ -1174,6 +1179,7 @@ void TupleAggregateStep::prep1PhaseAggregate( scaleAgg.push_back(scaleProj[colProj]); precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); + csNumAgg.push_back(csNumProj[colProj]); widthAgg.push_back(width[colProj]); if (groupBy[it->second]->fOutputColumnIndex == (uint32_t) - 1) @@ -1199,6 +1205,7 @@ void TupleAggregateStep::prep1PhaseAggregate( scaleAgg.push_back(ti.scale); precisionAgg.push_back(ti.precision); typeAgg.push_back(ti.dtype); + csNumAgg.push_back(ti.csNum); widthAgg.push_back(ti.width); ++outIdx; continue; @@ -1212,6 +1219,7 @@ void TupleAggregateStep::prep1PhaseAggregate( scaleAgg.push_back(scaleProj[colProj]); precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); + csNumAgg.push_back(csNumProj[colProj]); widthAgg.push_back(width[colProj]); ++outIdx; continue; @@ -1224,6 +1232,7 @@ void TupleAggregateStep::prep1PhaseAggregate( scaleAgg.push_back(scaleProj[colProj]); precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); + csNumAgg.push_back(csNumProj[colProj]); widthAgg.push_back(width[colProj]); ++outIdx; continue; @@ -1291,7 +1300,8 @@ void TupleAggregateStep::prep1PhaseAggregate( scaleAgg.push_back(scaleProj[colProj]); precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); - widthAgg.push_back(width[colProj]); + csNumAgg.push_back(csNumProj[colProj]); + widthAgg.push_back(width[colProj]); } break; @@ -1321,6 +1331,7 @@ void TupleAggregateStep::prep1PhaseAggregate( oidsAgg.push_back(oidsProj[colProj]); keysAgg.push_back(key); typeAgg.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAgg.push_back(csNumProj[colProj]); precisionAgg.push_back(-1); widthAgg.push_back(sizeof(long double)); scaleAgg.push_back(0); @@ -1336,6 +1347,7 @@ void TupleAggregateStep::prep1PhaseAggregate( // work around count() in select subquery precisionAgg.push_back(9999); typeAgg.push_back(CalpontSystemCatalog::UBIGINT); + csNumAgg.push_back(csNumProj[colProj]); widthAgg.push_back(bigIntWidth); } break; @@ -1365,6 +1377,7 @@ void TupleAggregateStep::prep1PhaseAggregate( scaleAgg.push_back(scaleProj[colProj]); precisionAgg.push_back(0); typeAgg.push_back(CalpontSystemCatalog::DOUBLE); + csNumAgg.push_back(csNumProj[colProj]); widthAgg.push_back(sizeof(double)); } break; @@ -1387,6 +1400,7 @@ void TupleAggregateStep::prep1PhaseAggregate( typeAgg.push_back(CalpontSystemCatalog::BIGINT); } + csNumAgg.push_back(csNumProj[colProj]); widthAgg.push_back(bigIntWidth); } break; @@ -1406,6 +1420,7 @@ void TupleAggregateStep::prep1PhaseAggregate( scaleAgg.push_back(udafFuncCol->fUDAFContext.getScale()); precisionAgg.push_back(udafFuncCol->fUDAFContext.getPrecision()); typeAgg.push_back(udafFuncCol->fUDAFContext.getResultType()); + csNumAgg.push_back(csNumProj[colProj]); widthAgg.push_back(udafFuncCol->fUDAFContext.getColWidth()); break; } @@ -1507,6 +1522,7 @@ void TupleAggregateStep::prep1PhaseAggregate( precisionAgg.push_back(0); precisionAgg.push_back(0); typeAgg.push_back(CalpontSystemCatalog::UBIGINT); + csNumAgg.push_back(8); widthAgg.push_back(bigUintWidth); continue; } @@ -1522,6 +1538,7 @@ void TupleAggregateStep::prep1PhaseAggregate( scaleAgg.push_back(0); precisionAgg.push_back(-1); typeAgg.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAgg.push_back(8); widthAgg.push_back(sizeof(long double)); ++lastCol; @@ -1531,6 +1548,7 @@ void TupleAggregateStep::prep1PhaseAggregate( scaleAgg.push_back(0); precisionAgg.push_back(-1); typeAgg.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAgg.push_back(8); widthAgg.push_back(sizeof(long double)); ++lastCol; } @@ -1541,7 +1559,7 @@ void TupleAggregateStep::prep1PhaseAggregate( for (uint64_t i = 0; i < oidsAgg.size(); i++) posAgg.push_back(posAgg[i] + widthAgg[i]); - RowGroup aggRG(oidsAgg.size(), posAgg, oidsAgg, keysAgg, typeAgg, scaleAgg, precisionAgg, + RowGroup aggRG(oidsAgg.size(), posAgg, oidsAgg, keysAgg, typeAgg, csNumAgg, scaleAgg, precisionAgg, jobInfo.stringTableThreshold); SP_ROWAGG_UM_t rowAgg(new RowAggregationUM(groupBy, functionVec, jobInfo.rm, jobInfo.umMemLimit)); rowAgg->timeZone(jobInfo.timeZone); @@ -1588,6 +1606,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( const vector& scaleProj = projRG.getScale(); const vector& precisionProj = projRG.getPrecision(); const vector& typeProj = projRG.getColTypes(); + const vector& csNumProj = projRG.getCharsetNumbers(); vector posAgg, posAggDist; vector oidsAgg, oidsAggDist; @@ -1595,6 +1614,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( vector scaleAgg, scaleAggDist; vector precisionAgg, precisionAggDist; vector typeAgg, typeAggDist; + vector csNumAgg, csNumAggDist; vector widthProj, widthAgg, widthAggDist; vector groupBy, groupByNoDist; vector functionVec1, functionVec2, functionNoDistVec; @@ -1662,6 +1682,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAgg.push_back(scaleProj[colProj]); precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); + csNumAgg.push_back(csNumProj[colProj]); widthAgg.push_back(widthProj[colProj]); aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[colAgg], 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAgg)); @@ -1703,6 +1724,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAgg.push_back(scaleProj[colProj]); precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); + csNumAgg.push_back(csNumProj[colProj]); widthAgg.push_back(widthProj[colProj]); aggFuncMap.insert(make_pair(boost::make_tuple(keysAgg[colAgg], 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAgg)); @@ -1731,6 +1753,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAgg.push_back(ti.scale); precisionAgg.push_back(ti.precision); typeAgg.push_back(ti.dtype); + csNumAgg.push_back(ti.csNum); widthAgg.push_back(width); SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( aggOp, stats, colAgg, colAgg, -1)); @@ -1824,6 +1847,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAgg.push_back(scaleProj[colProj]); precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); + csNumAgg.push_back(csNumProj[colProj]); widthAgg.push_back(widthProj[colProj]); colAgg++; } @@ -1853,6 +1877,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( oidsAgg.push_back(oidsProj[colProj]); keysAgg.push_back(aggKey); typeAgg.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAgg.push_back(8); precisionAgg.push_back(-1); widthAgg.push_back(sizeof(long double)); scaleAgg.push_back(0); @@ -1884,6 +1909,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(CalpontSystemCatalog::BIGINT); } + csNumAgg.push_back(8); widthAgg.push_back(bigIntWidth); colAgg++; } @@ -1924,6 +1950,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAgg.push_back(0); precisionAgg.push_back(-1); typeAgg.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAgg.push_back(8); widthAgg.push_back(sizeof(long double)); ++colAgg; @@ -1933,6 +1960,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAgg.push_back(0); precisionAgg.push_back(-1); typeAgg.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAgg.push_back(8); widthAgg.push_back(sizeof(long double)); ++colAgg; } @@ -1956,6 +1984,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAgg.push_back(CalpontSystemCatalog::BIGINT); } + csNumAgg.push_back(8); widthAgg.push_back(bigIntWidth); colAgg++; } @@ -1976,6 +2005,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAgg.push_back(udafFuncCol->fUDAFContext.getScale()); precisionAgg.push_back(udafFuncCol->fUDAFContext.getPrecision()); typeAgg.push_back(udafFuncCol->fUDAFContext.getResultType()); + csNumAgg.push_back(udafFuncCol->fUDAFContext.getCharsetNumber()); widthAgg.push_back(udafFuncCol->fUDAFContext.getColWidth()); ++colAgg; // Column for index of UDAF UserData struct @@ -1984,6 +2014,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAgg.push_back(0); precisionAgg.push_back(0); typeAgg.push_back(CalpontSystemCatalog::UBIGINT); + csNumAgg.push_back(8); widthAgg.push_back(sizeof(uint64_t)); funct->fAuxColumnIndex = colAgg++; // If the first param is const @@ -2004,6 +2035,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAgg.push_back(scaleProj[colProj]); precisionAgg.push_back(precisionProj[colProj]); typeAgg.push_back(typeProj[colProj]); + csNumAgg.push_back(csNumProj[colProj]); widthAgg.push_back(widthProj[colProj]); multiParmIndexes.push_back(colAgg); ++colAgg; @@ -2174,6 +2206,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( oidsAggDist.push_back(oidsAgg[colAgg]); keysAggDist.push_back(retKey); typeAggDist.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAggDist.push_back(8); precisionAggDist.push_back(-1); widthAggDist.push_back(sizeof(long double)); scaleAggDist.push_back(0); @@ -2188,6 +2221,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // work around count() in select subquery precisionAggDist.push_back(9999); typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); + csNumAggDist.push_back(8); widthAggDist.push_back(bigIntWidth); } break; @@ -2214,6 +2248,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAggDist.push_back(scaleAgg[colAgg]); precisionAggDist.push_back(precisionAgg[colAgg]); typeAggDist.push_back(typeAgg[colAgg]); + csNumAggDist.push_back(csNumAgg[colAgg]); uint32_t width = widthAgg[colAgg]; if (aggOp == ROWAGG_GROUP_CONCAT) @@ -2250,6 +2285,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( keysAggDist.push_back(retKey); scaleAggDist.push_back(0); typeAggDist.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAggDist.push_back(8); precisionAggDist.push_back(-1); widthAggDist.push_back(sizeof(long double)); } @@ -2272,7 +2308,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( typeAggDist.push_back(CalpontSystemCatalog::BIGINT); precisionAggDist.push_back(19); } - + csNumAggDist.push_back(8); widthAggDist.push_back(bigIntWidth); } } @@ -2287,6 +2323,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAggDist.push_back(ti.scale); precisionAggDist.push_back(ti.precision); typeAggDist.push_back(ti.dtype); + csNumAggDist.push_back(ti.csNum); widthAggDist.push_back(ti.width); returnColMissing = false; @@ -2299,6 +2336,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAggDist.push_back(ti.scale); precisionAggDist.push_back(ti.precision); typeAggDist.push_back(ti.dtype); + csNumAggDist.push_back(ti.csNum); widthAggDist.push_back(ti.width); returnColMissing = false; @@ -2313,6 +2351,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAggDist.push_back(ti.scale); precisionAggDist.push_back(ti.precision); typeAggDist.push_back(ti.dtype); + csNumAggDist.push_back(ti.csNum); widthAggDist.push_back(ti.width); returnColMissing = false; @@ -2332,6 +2371,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAggDist.push_back(scaleProj[k] >> 8); precisionAggDist.push_back(precisionProj[k]); typeAggDist.push_back(typeProj[k]); + csNumAggDist.push_back(csNumProj[k]); widthAggDist.push_back(widthProj[k]); returnColMissing = false; @@ -2352,6 +2392,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAggDist.push_back(scaleProj[k] >> 8); precisionAggDist.push_back(precisionProj[k]); typeAggDist.push_back(typeProj[k]); + csNumAggDist.push_back(csNumProj[k]); widthAggDist.push_back(widthProj[k]); returnColMissing = false; @@ -2476,6 +2517,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAggDist.push_back(0); precisionAggDist.push_back(19); typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); + csNumAggDist.push_back(8); widthAggDist.push_back(bigIntWidth); } } @@ -2508,6 +2550,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAggDist.push_back(0); precisionAggDist.push_back(19); typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + csNumAggDist.push_back(8); widthAggDist.push_back(bigIntWidth); } } @@ -2533,6 +2576,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAggDist.push_back(0); precisionAggDist.push_back(0); typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); + csNumAggDist.push_back(8); widthAggDist.push_back(sizeof(uint64_t)); continue; } @@ -2548,6 +2592,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAggDist.push_back(0); precisionAggDist.push_back(0); typeAggDist.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAggDist.push_back(8); widthAggDist.push_back(sizeof(long double)); ++lastCol; @@ -2557,6 +2602,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAggDist.push_back(0); precisionAggDist.push_back(-1); typeAggDist.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAggDist.push_back(8); widthAggDist.push_back(sizeof(long double)); ++lastCol; } @@ -2568,7 +2614,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( for (uint64_t i = 0; i < oidsAgg.size(); i++) posAgg.push_back(posAgg[i] + widthAgg[i]); - RowGroup aggRG(oidsAgg.size(), posAgg, oidsAgg, keysAgg, typeAgg, scaleAgg, precisionAgg, + RowGroup aggRG(oidsAgg.size(), posAgg, oidsAgg, keysAgg, typeAgg, csNumAgg, scaleAgg, precisionAgg, jobInfo.stringTableThreshold); SP_ROWAGG_UM_t rowAgg(new RowAggregationUM(groupBy, functionVec1, jobInfo.rm, jobInfo.umMemLimit)); rowAgg->timeZone(jobInfo.timeZone); @@ -2579,7 +2625,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( posAggDist.push_back(posAggDist[i] + widthAggDist[i]); RowGroup aggRgDist(oidsAggDist.size(), posAggDist, oidsAggDist, keysAggDist, typeAggDist, - scaleAggDist, precisionAggDist, jobInfo.stringTableThreshold); + csNumAggDist, scaleAggDist, precisionAggDist, jobInfo.stringTableThreshold); SP_ROWAGG_DIST rowAggDist(new RowAggregationDistinct(groupByNoDist, functionVec2, jobInfo.rm, jobInfo.umMemLimit)); rowAggDist->timeZone(jobInfo.timeZone); @@ -2609,6 +2655,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( vector scaleAggGb, scaleAggSub; vector precisionAggGb, precisionAggSub; vector typeAggGb, typeAggSub; + vector csNumAggGb, csNumAggSub; vector widthAggGb, widthAggSub; // populate groupby column info @@ -2619,6 +2666,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAggGb.push_back(scaleProj[i]); precisionAggGb.push_back(precisionProj[i]); typeAggGb.push_back(typeProj[i]); + csNumAggGb.push_back(csNumProj[i]); widthAggGb.push_back(widthProj[i]); } @@ -2647,6 +2695,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAggSub = scaleAggGb; precisionAggSub = precisionAggGb; typeAggSub = typeAggGb; + csNumAggSub = csNumAggGb; widthAggSub = widthAggGb; oidsAggSub.push_back(oidsProj[j]); @@ -2654,6 +2703,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( scaleAggSub.push_back(scaleProj[j]); precisionAggSub.push_back(precisionProj[j]); typeAggSub.push_back(typeProj[j]); + csNumAggSub.push_back(csNumProj[j]); widthAggSub.push_back(widthProj[j]); // construct sub-rowgroup @@ -2664,7 +2714,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( posAggSub.push_back(posAggSub[k] + widthAggSub[k]); RowGroup subRg(oidsAggSub.size(), posAggSub, oidsAggSub, keysAggSub, typeAggSub, - scaleAggSub, precisionAggSub, jobInfo.stringTableThreshold); + csNumAggSub, scaleAggSub, precisionAggSub, jobInfo.stringTableThreshold); subRgVec.push_back(subRg); // construct groupby vector @@ -2873,6 +2923,7 @@ void TupleAggregateStep::prep2PhasesAggregate( const vector& scaleProj = projRG.getScale(); const vector& precisionProj = projRG.getPrecision(); const vector& typeProj = projRG.getColTypes(); + const vector& csNumProj = projRG.getCharsetNumbers(); vector posAggPm, posAggUm; vector oidsAggPm, oidsAggUm; @@ -2880,6 +2931,7 @@ void TupleAggregateStep::prep2PhasesAggregate( vector scaleAggPm, scaleAggUm; vector precisionAggPm, precisionAggUm; vector typeAggPm, typeAggUm; + vector csNumAggPm, csNumAggUm; vector widthAggPm, widthAggUm; vector groupByPm, groupByUm; vector functionVecPm, functionVecUm; @@ -2937,6 +2989,7 @@ void TupleAggregateStep::prep2PhasesAggregate( scaleAggPm.push_back(scaleProj[colProj]); precisionAggPm.push_back(precisionProj[colProj]); typeAggPm.push_back(typeProj[colProj]); + csNumAggPm.push_back(csNumProj[colProj]); widthAggPm.push_back(width[colProj]); aggFuncMap.insert(make_pair(boost::make_tuple(keysAggPm[colAggPm], 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAggPm)); @@ -2977,6 +3030,7 @@ void TupleAggregateStep::prep2PhasesAggregate( keysAggPm.push_back(key); scaleAggPm.push_back(scaleProj[colProj]); typeAggPm.push_back(typeProj[colProj]); + csNumAggPm.push_back(csNumProj[colProj]); widthAggPm.push_back(width[colProj]); precisionAggPm.push_back(precisionProj[colProj]); @@ -3071,6 +3125,7 @@ void TupleAggregateStep::prep2PhasesAggregate( scaleAggPm.push_back(scaleProj[colProj]); precisionAggPm.push_back(precisionProj[colProj]); typeAggPm.push_back(typeProj[colProj]); + csNumAggPm.push_back(csNumProj[colProj]); widthAggPm.push_back(width[colProj]); colAggPm++; } @@ -3100,6 +3155,7 @@ void TupleAggregateStep::prep2PhasesAggregate( oidsAggPm.push_back(oidsProj[colProj]); keysAggPm.push_back(aggKey); typeAggPm.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAggPm.push_back(8); scaleAggPm.push_back(0); precisionAggPm.push_back(-1); widthAggPm.push_back(sizeof(long double)); @@ -3120,6 +3176,7 @@ void TupleAggregateStep::prep2PhasesAggregate( // work around count() in select subquery precisionAggPm.push_back(9999); typeAggPm.push_back(CalpontSystemCatalog::UBIGINT); + csNumAggPm.push_back(8); widthAggPm.push_back(bigIntWidth); colAggPm++; } @@ -3151,6 +3208,7 @@ void TupleAggregateStep::prep2PhasesAggregate( scaleAggPm.push_back(scaleProj[colProj]); precisionAggPm.push_back(0); typeAggPm.push_back(CalpontSystemCatalog::DOUBLE); + csNumAggPm.push_back(8); widthAggPm.push_back(sizeof(double)); funct->fAuxColumnIndex = ++colAggPm; @@ -3160,6 +3218,7 @@ void TupleAggregateStep::prep2PhasesAggregate( scaleAggPm.push_back(0); precisionAggPm.push_back(-1); typeAggPm.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAggPm.push_back(8); widthAggPm.push_back(sizeof(long double)); ++colAggPm; @@ -3169,6 +3228,7 @@ void TupleAggregateStep::prep2PhasesAggregate( scaleAggPm.push_back(0); precisionAggPm.push_back(-1); typeAggPm.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAggPm.push_back(8); widthAggPm.push_back(sizeof(long double)); ++colAggPm; } @@ -3192,6 +3252,7 @@ void TupleAggregateStep::prep2PhasesAggregate( typeAggPm.push_back(CalpontSystemCatalog::BIGINT); } + csNumAggPm.push_back(8); widthAggPm.push_back(bigIntWidth); colAggPm++; } @@ -3212,6 +3273,7 @@ void TupleAggregateStep::prep2PhasesAggregate( scaleAggPm.push_back(udafFuncCol->fUDAFContext.getScale()); precisionAggPm.push_back(udafFuncCol->fUDAFContext.getPrecision()); typeAggPm.push_back(udafFuncCol->fUDAFContext.getResultType()); + csNumAggPm.push_back(udafFuncCol->fUDAFContext.getCharsetNumber()); widthAggPm.push_back(udafFuncCol->fUDAFContext.getColWidth()); ++colAggPm; // Column for index of UDAF UserData struct @@ -3220,6 +3282,7 @@ void TupleAggregateStep::prep2PhasesAggregate( scaleAggPm.push_back(0); precisionAggPm.push_back(0); typeAggPm.push_back(CalpontSystemCatalog::UBIGINT); + csNumAggPm.push_back(8); widthAggPm.push_back(bigUintWidth); funct->fAuxColumnIndex = colAggPm++; // If the first param is const @@ -3240,6 +3303,7 @@ void TupleAggregateStep::prep2PhasesAggregate( scaleAggPm.push_back(scaleProj[colProj]); precisionAggPm.push_back(precisionProj[colProj]); typeAggPm.push_back(typeProj[colProj]); + csNumAggPm.push_back(csNumProj[colProj]); widthAggPm.push_back(width[colProj]); colAggPm++; // If the param is const @@ -3353,6 +3417,7 @@ void TupleAggregateStep::prep2PhasesAggregate( scaleAggUm.push_back(scaleAggPm[colPm]); precisionAggUm.push_back(precisionAggPm[colPm]); typeAggUm.push_back(typeAggPm[colPm]); + csNumAggUm.push_back(csNumAggPm[colPm]); widthAggUm.push_back(widthAggPm[colPm]); } @@ -3379,6 +3444,7 @@ void TupleAggregateStep::prep2PhasesAggregate( keysAggUm.push_back(retKey); scaleAggUm.push_back(0); typeAggUm.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAggUm.push_back(8); precisionAggUm.push_back(-1); widthAggUm.push_back(sizeof(long double)); } @@ -3393,6 +3459,7 @@ void TupleAggregateStep::prep2PhasesAggregate( scaleAggUm.push_back(0); precisionAggUm.push_back(19); typeAggUm.push_back(CalpontSystemCatalog::UBIGINT); + csNumAggUm.push_back(8); widthAggUm.push_back(bigIntWidth); } } @@ -3407,6 +3474,7 @@ void TupleAggregateStep::prep2PhasesAggregate( scaleAggUm.push_back(ti.scale); precisionAggUm.push_back(ti.precision); typeAggUm.push_back(ti.dtype); + csNumAggUm.push_back(ti.csNum); widthAggUm.push_back(ti.width); returnColMissing = false; @@ -3420,6 +3488,7 @@ void TupleAggregateStep::prep2PhasesAggregate( scaleAggUm.push_back(ti.scale); precisionAggUm.push_back(ti.precision); typeAggUm.push_back(ti.dtype); + csNumAggUm.push_back(ti.csNum); widthAggUm.push_back(ti.width); returnColMissing = false; @@ -3432,6 +3501,7 @@ void TupleAggregateStep::prep2PhasesAggregate( scaleAggUm.push_back(ti.scale); precisionAggUm.push_back(ti.precision); typeAggUm.push_back(ti.dtype); + csNumAggUm.push_back(ti.csNum); widthAggUm.push_back(ti.width); returnColMissing = false; @@ -3560,6 +3630,7 @@ void TupleAggregateStep::prep2PhasesAggregate( scaleAggUm.push_back(0); precisionAggUm.push_back(19); typeAggUm.push_back(CalpontSystemCatalog::UBIGINT); + csNumAggUm.push_back(8); widthAggUm.push_back(bigIntWidth); } } @@ -3585,6 +3656,7 @@ void TupleAggregateStep::prep2PhasesAggregate( scaleAggUm.push_back(0); precisionAggUm.push_back(0); typeAggUm.push_back(CalpontSystemCatalog::UBIGINT); + csNumAggUm.push_back(8); widthAggUm.push_back(bigUintWidth); continue; } @@ -3600,6 +3672,7 @@ void TupleAggregateStep::prep2PhasesAggregate( scaleAggUm.push_back(0); precisionAggUm.push_back(-1); typeAggUm.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAggUm.push_back(8); widthAggUm.push_back(sizeof(long double)); ++lastCol; @@ -3609,6 +3682,7 @@ void TupleAggregateStep::prep2PhasesAggregate( scaleAggUm.push_back(0); precisionAggUm.push_back(-1); typeAggUm.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAggUm.push_back(8); widthAggUm.push_back(sizeof(long double)); ++lastCol; } @@ -3620,8 +3694,8 @@ void TupleAggregateStep::prep2PhasesAggregate( for (uint64_t i = 0; i < oidsAggUm.size(); i++) posAggUm.push_back(posAggUm[i] + widthAggUm[i]); - RowGroup aggRgUm(oidsAggUm.size(), posAggUm, oidsAggUm, keysAggUm, typeAggUm, scaleAggUm, - precisionAggUm, jobInfo.stringTableThreshold); + RowGroup aggRgUm(oidsAggUm.size(), posAggUm, oidsAggUm, keysAggUm, typeAggUm, + csNumAggUm, scaleAggUm, precisionAggUm, jobInfo.stringTableThreshold); SP_ROWAGG_UM_t rowAggUm(new RowAggregationUMP2(groupByUm, functionVecUm, jobInfo.rm, jobInfo.umMemLimit)); rowAggUm->timeZone(jobInfo.timeZone); rowgroups.push_back(aggRgUm); @@ -3632,8 +3706,8 @@ void TupleAggregateStep::prep2PhasesAggregate( for (uint64_t i = 0; i < oidsAggPm.size(); i++) posAggPm.push_back(posAggPm[i] + widthAggPm[i]); - RowGroup aggRgPm(oidsAggPm.size(), posAggPm, oidsAggPm, keysAggPm, typeAggPm, scaleAggPm, - precisionAggPm, jobInfo.stringTableThreshold); + RowGroup aggRgPm(oidsAggPm.size(), posAggPm, oidsAggPm, keysAggPm, typeAggPm, + csNumAggPm, scaleAggPm, precisionAggPm, jobInfo.stringTableThreshold); SP_ROWAGG_PM_t rowAggPm(new RowAggregation(groupByPm, functionVecPm)); rowAggPm->timeZone(jobInfo.timeZone); rowgroups.push_back(aggRgPm); @@ -3717,6 +3791,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( const vector& scaleProj = projRG.getScale(); const vector& precisionProj = projRG.getPrecision(); const vector& typeProj = projRG.getColTypes(); + const vector& csNumProj = projRG.getCharsetNumbers(); vector posAggPm, posAggUm, posAggDist; vector oidsAggPm, oidsAggUm, oidsAggDist; @@ -3724,6 +3799,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector scaleAggPm, scaleAggUm, scaleAggDist; vector precisionAggPm, precisionAggUm, precisionAggDist; vector typeAggPm, typeAggUm, typeAggDist; + vector csNumAggPm, csNumAggUm, csNumAggDist; vector widthAggPm, widthAggUm, widthAggDist; vector groupByPm, groupByUm, groupByNoDist; @@ -3785,6 +3861,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggPm.push_back(scaleProj[colProj]); precisionAggPm.push_back(precisionProj[colProj]); typeAggPm.push_back(typeProj[colProj]); + csNumAggPm.push_back(csNumProj[colProj]); widthAggPm.push_back(width[colProj]); aggFuncMap.insert(make_pair(boost::make_tuple(keysAggPm[colAggPm], 0, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAggPm)); @@ -3826,6 +3903,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggPm.push_back(scaleProj[colProj]); precisionAggPm.push_back(precisionProj[colProj]); typeAggPm.push_back(typeProj[colProj]); + csNumAggPm.push_back(csNumProj[colProj]); widthAggPm.push_back(width[colProj]); precisionAggPm.push_back(precisionProj[colProj]); @@ -3927,6 +4005,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggPm.push_back(scaleProj[colProj]); precisionAggPm.push_back(precisionProj[colProj]); typeAggPm.push_back(typeProj[colProj]); + csNumAggPm.push_back(csNumProj[colProj]); widthAggPm.push_back(width[colProj]); colAggPm++; } @@ -3956,6 +4035,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( oidsAggPm.push_back(oidsProj[colProj]); keysAggPm.push_back(aggKey); typeAggPm.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAggPm.push_back(8); precisionAggPm.push_back(-1); widthAggPm.push_back(sizeof(long double)); scaleAggPm.push_back(0); @@ -3987,6 +4067,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( typeAggPm.push_back(CalpontSystemCatalog::BIGINT); } + csNumAggPm.push_back(8); widthAggPm.push_back(bigIntWidth); colAggPm++; } @@ -4018,6 +4099,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggPm.push_back(scaleProj[colProj]); precisionAggPm.push_back(0); typeAggPm.push_back(CalpontSystemCatalog::DOUBLE); + csNumAggPm.push_back(8); widthAggPm.push_back(sizeof(double)); funct->fAuxColumnIndex = ++colAggPm; @@ -4027,6 +4109,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggPm.push_back(0); precisionAggPm.push_back(-1); typeAggPm.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAggPm.push_back(8); widthAggPm.push_back(sizeof(long double)); ++colAggPm; @@ -4036,6 +4119,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggPm.push_back(0); precisionAggPm.push_back(-1); typeAggPm.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAggPm.push_back(8); widthAggPm.push_back(sizeof(long double)); ++colAggPm; } @@ -4059,6 +4143,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( typeAggPm.push_back(CalpontSystemCatalog::BIGINT); } + csNumAggPm.push_back(8); widthAggPm.push_back(bigIntWidth); ++colAggPm; } @@ -4079,6 +4164,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggPm.push_back(udafFuncCol->fUDAFContext.getScale()); precisionAggPm.push_back(udafFuncCol->fUDAFContext.getPrecision()); typeAggPm.push_back(udafFuncCol->fUDAFContext.getResultType()); + csNumAggPm.push_back(udafFuncCol->fUDAFContext.getCharsetNumber()); widthAggPm.push_back(udafFuncCol->fUDAFContext.getColWidth()); ++colAggPm; // Column for index of UDAF UserData struct @@ -4087,6 +4173,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggPm.push_back(0); precisionAggPm.push_back(0); typeAggPm.push_back(CalpontSystemCatalog::UBIGINT); + csNumAggPm.push_back(8); widthAggPm.push_back(sizeof(uint64_t)); funct->fAuxColumnIndex = colAggPm++; // If the first param is const @@ -4107,6 +4194,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggPm.push_back(scaleProj[colProj]); precisionAggPm.push_back(precisionProj[colProj]); typeAggPm.push_back(typeProj[colProj]); + csNumAggPm.push_back(csNumProj[colProj]); widthAggPm.push_back(width[colProj]); multiParmIndexes.push_back(colAggPm); ++colAggPm; @@ -4208,6 +4296,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( precisionAggUm.push_back(precisionAggPm[idx]); widthAggUm.push_back(widthAggPm[idx]); typeAggUm.push_back(typeAggPm[idx]); + csNumAggUm.push_back(csNumAggPm[idx]); } } @@ -4325,6 +4414,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( oidsAggDist.push_back(oidsAggUm[colUm]); keysAggDist.push_back(retKey); typeAggDist.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAggDist.push_back(8); precisionAggDist.push_back(-1); widthAggDist.push_back(sizeof(long double)); scaleAggDist.push_back(0); @@ -4342,6 +4432,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // work around count() in select subquery precisionAggDist.push_back(9999); typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); + csNumAggDist.push_back(8); widthAggDist.push_back(bigIntWidth); } break; @@ -4365,6 +4456,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggDist.push_back(scaleAggUm[colUm]); precisionAggDist.push_back(precisionAggUm[colUm]); typeAggDist.push_back(typeAggUm[colUm]); + csNumAggDist.push_back(csNumAggUm[colUm]); widthAggDist.push_back(widthAggUm[colUm]); } @@ -4391,6 +4483,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( keysAggDist.push_back(retKey); scaleAggDist.push_back(0); typeAggDist.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAggDist.push_back(8); precisionAggDist.push_back(-1); widthAggDist.push_back(sizeof(long double)); } @@ -4412,6 +4505,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( precisionAggDist.push_back(19); typeAggDist.push_back(CalpontSystemCatalog::BIGINT); } + csNumAggDist.push_back(8); widthAggDist.push_back(bigIntWidth); } } @@ -4426,6 +4520,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggDist.push_back(ti.scale); precisionAggDist.push_back(ti.precision); typeAggDist.push_back(ti.dtype); + csNumAggDist.push_back(ti.csNum); widthAggDist.push_back(ti.width); returnColMissing = false; @@ -4439,6 +4534,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggDist.push_back(ti.scale); precisionAggDist.push_back(ti.precision); typeAggDist.push_back(ti.dtype); + csNumAggDist.push_back(ti.csNum); widthAggDist.push_back(ti.width); returnColMissing = false; @@ -4451,6 +4547,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggDist.push_back(ti.scale); precisionAggDist.push_back(ti.precision); typeAggDist.push_back(ti.dtype); + csNumAggDist.push_back(ti.csNum); widthAggDist.push_back(ti.width); returnColMissing = false; @@ -4570,6 +4667,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggDist.push_back(0); precisionAggDist.push_back(19); typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); + csNumAggDist.push_back(8); widthAggDist.push_back(bigIntWidth); } } @@ -4602,6 +4700,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggDist.push_back(0); precisionAggDist.push_back(19); typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + csNumAggDist.push_back(8); widthAggDist.push_back(bigIntWidth); } } @@ -4627,6 +4726,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggDist.push_back(0); precisionAggDist.push_back(0); typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); + csNumAggDist.push_back(8); widthAggDist.push_back(sizeof(uint64_t)); continue; } @@ -4642,6 +4742,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggDist.push_back(0); precisionAggDist.push_back(-1); typeAggDist.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAggDist.push_back(8); widthAggDist.push_back(sizeof(long double)); ++lastCol; @@ -4651,6 +4752,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggDist.push_back(0); precisionAggDist.push_back(-1); typeAggDist.push_back(CalpontSystemCatalog::LONGDOUBLE); + csNumAggDist.push_back(8); widthAggDist.push_back(sizeof(long double)); ++lastCol; } @@ -4663,8 +4765,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( for (uint64_t i = 0; i < oidsAggUm.size(); i++) posAggUm.push_back(posAggUm[i] + widthAggUm[i]); - RowGroup aggRgUm(oidsAggUm.size(), posAggUm, oidsAggUm, keysAggUm, typeAggUm, scaleAggUm, - precisionAggUm, jobInfo.stringTableThreshold); + RowGroup aggRgUm(oidsAggUm.size(), posAggUm, oidsAggUm, keysAggUm, typeAggUm, + csNumAggUm, scaleAggUm, precisionAggUm, jobInfo.stringTableThreshold); SP_ROWAGG_UM_t rowAggUm(new RowAggregationUMP2(groupByUm, functionNoDistVec, jobInfo.rm, jobInfo.umMemLimit)); rowAggUm->timeZone(jobInfo.timeZone); @@ -4673,8 +4775,9 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( for (uint64_t i = 0; i < oidsAggDist.size(); i++) posAggDist.push_back(posAggDist[i] + widthAggDist[i]); - RowGroup aggRgDist(oidsAggDist.size(), posAggDist, oidsAggDist, keysAggDist, typeAggDist, - scaleAggDist, precisionAggDist, jobInfo.stringTableThreshold); + RowGroup aggRgDist(oidsAggDist.size(), posAggDist, oidsAggDist, keysAggDist, + typeAggDist, csNumAggDist, scaleAggDist, + precisionAggDist, jobInfo.stringTableThreshold); SP_ROWAGG_DIST rowAggDist(new RowAggregationDistinct(groupByNoDist, functionVecUm, jobInfo.rm, jobInfo.umMemLimit)); rowAggDist->timeZone(jobInfo.timeZone); @@ -4695,6 +4798,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( vector scaleAggGb, scaleAggSub; vector precisionAggGb, precisionAggSub; vector typeAggGb, typeAggSub; + vector csNumAggGb, csNumAggSub; vector widthAggGb, widthAggSub; // populate groupby column info @@ -4705,6 +4809,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggGb.push_back(scaleAggUm[i]); precisionAggGb.push_back(precisionAggUm[i]); typeAggGb.push_back(typeAggUm[i]); + csNumAggGb.push_back(csNumAggUm[i]); widthAggGb.push_back(widthAggUm[i]); } @@ -4733,6 +4838,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggSub = scaleAggGb; precisionAggSub = precisionAggGb; typeAggSub = typeAggGb; + csNumAggSub = csNumAggGb; widthAggSub = widthAggGb; oidsAggSub.push_back(oidsAggUm[j]); @@ -4740,6 +4846,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggSub.push_back(scaleAggUm[j]); precisionAggSub.push_back(precisionAggUm[j]); typeAggSub.push_back(typeAggUm[j]); + csNumAggSub.push_back(csNumAggUm[i]); widthAggSub.push_back(widthAggUm[j]); // construct sub-rowgroup @@ -4750,7 +4857,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( posAggSub.push_back(posAggSub[k] + widthAggSub[k]); RowGroup subRg(oidsAggSub.size(), posAggSub, oidsAggSub, keysAggSub, typeAggSub, - scaleAggSub, precisionAggSub, jobInfo.stringTableThreshold); + csNumAggSub, scaleAggSub, precisionAggSub, jobInfo.stringTableThreshold); subRgVec.push_back(subRg); // construct groupby vector @@ -4908,8 +5015,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( for (uint64_t i = 0; i < oidsAggPm.size(); i++) posAggPm.push_back(posAggPm[i] + widthAggPm[i]); - RowGroup aggRgPm(oidsAggPm.size(), posAggPm, oidsAggPm, keysAggPm, typeAggPm, scaleAggPm, - precisionAggPm, jobInfo.stringTableThreshold); + RowGroup aggRgPm(oidsAggPm.size(), posAggPm, oidsAggPm, keysAggPm, typeAggPm, + csNumAggPm, scaleAggPm, precisionAggPm, jobInfo.stringTableThreshold); SP_ROWAGG_PM_t rowAggPm(new RowAggregation(groupByPm, functionVecPm)); rowAggPm->timeZone(jobInfo.timeZone); rowgroups.push_back(aggRgPm); diff --git a/dbcon/joblist/tupleannexstep.cpp b/dbcon/joblist/tupleannexstep.cpp index 3f9e8a7b4..841817fc5 100644 --- a/dbcon/joblist/tupleannexstep.cpp +++ b/dbcon/joblist/tupleannexstep.cpp @@ -201,6 +201,7 @@ void TupleAnnexStep::initialize(const RowGroup& rgIn, const JobInfo& jobInfo) vector scale, scaleIn = rgIn.getScale(); vector precision, precisionIn = rgIn.getPrecision(); vector types, typesIn = rgIn.getColTypes(); + vector csNums, csNumsIn = rgIn.getCharsetNumbers(); vector pos, posIn = rgIn.getOffsets(); size_t n = jobInfo.nonConstDelCols.size(); @@ -210,9 +211,10 @@ void TupleAnnexStep::initialize(const RowGroup& rgIn, const JobInfo& jobInfo) scale.insert(scale.end(), scaleIn.begin(), scaleIn.begin() + n); precision.insert(precision.end(), precisionIn.begin(), precisionIn.begin() + n); types.insert(types.end(), typesIn.begin(), typesIn.begin() + n); + csNums.insert(csNums.end(), csNumsIn.begin(), csNumsIn.begin() + n); pos.insert(pos.end(), posIn.begin(), posIn.begin() + n + 1); - fRowGroupOut = RowGroup(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold); + fRowGroupOut = RowGroup(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold); } else { diff --git a/dbcon/joblist/tupleconstantstep.cpp b/dbcon/joblist/tupleconstantstep.cpp index 3e2b61e39..6ec49884e 100644 --- a/dbcon/joblist/tupleconstantstep.cpp +++ b/dbcon/joblist/tupleconstantstep.cpp @@ -108,6 +108,7 @@ void TupleConstantStep::initialize(const JobInfo& jobInfo, const RowGroup* rgIn) vector scale, scaleIn = fRowGroupIn.getScale(); vector precision, precisionIn = fRowGroupIn.getPrecision(); vector types, typesIn = fRowGroupIn.getColTypes(); + vector csNums, csNumsIn = fRowGroupIn.getCharsetNumbers(); vector pos; pos.push_back(2); @@ -120,6 +121,7 @@ void TupleConstantStep::initialize(const JobInfo& jobInfo, const RowGroup* rgIn) scaleIn = fRowGroupIn.getScale(); precisionIn = fRowGroupIn.getPrecision(); typesIn = fRowGroupIn.getColTypes(); + csNumsIn = fRowGroupIn.getCharsetNumbers(); } for (uint64_t i = 0, j = 0; i < jobInfo.deliveredCols.size(); i++) @@ -145,6 +147,7 @@ void TupleConstantStep::initialize(const JobInfo& jobInfo, const RowGroup* rgIn) scale.push_back(ct.scale); precision.push_back(ct.precision); types.push_back(ct.colDataType); + csNums.push_back(ct.charsetNumber); pos.push_back(pos.back() + ct.colWidth); fIndexConst.push_back(i); @@ -164,6 +167,7 @@ void TupleConstantStep::initialize(const JobInfo& jobInfo, const RowGroup* rgIn) scale.push_back(scaleIn[j]); precision.push_back(precisionIn[j]); types.push_back(typesIn[j]); + csNums.push_back(csNumsIn[j]); pos.push_back(pos.back() + fRowGroupIn.getColumnWidth(j)); j++; @@ -171,7 +175,7 @@ void TupleConstantStep::initialize(const JobInfo& jobInfo, const RowGroup* rgIn) } } - fRowGroupOut = RowGroup(oids.size(), pos, oids, keys, types, scale, precision, + fRowGroupOut = RowGroup(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold); fRowGroupOut.initRow(&fRowOut); fRowGroupOut.initRow(&fRowConst, true); @@ -644,6 +648,7 @@ void TupleConstantOnlyStep::initialize(const JobInfo& jobInfo, const rowgroup::R vector scale; vector precision; vector types; + vector csNums; vector pos; pos.push_back(2); @@ -673,12 +678,13 @@ void TupleConstantOnlyStep::initialize(const JobInfo& jobInfo, const rowgroup::R scale.push_back(ct.scale); precision.push_back(ct.precision); types.push_back(ct.colDataType); + csNums.push_back(ct.charsetNumber); pos.push_back(pos.back() + ct.colWidth); fIndexConst.push_back(i); } - fRowGroupOut = RowGroup(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold, false); + fRowGroupOut = RowGroup(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold, false); fRowGroupOut.initRow(&fRowOut); fRowGroupOut.initRow(&fRowConst, true); diff --git a/dbcon/joblist/tuplehavingstep.cpp b/dbcon/joblist/tuplehavingstep.cpp index 6477e9293..65c62d0a3 100644 --- a/dbcon/joblist/tuplehavingstep.cpp +++ b/dbcon/joblist/tuplehavingstep.cpp @@ -102,6 +102,7 @@ void TupleHavingStep::initialize(const RowGroup& rgIn, const JobInfo& jobInfo) vector scale, scaleIn = fRowGroupIn.getScale(); vector precision, precisionIn = fRowGroupIn.getPrecision(); vector types, typesIn = fRowGroupIn.getColTypes(); + vector csNums, csNumsIn = fRowGroupIn.getCharsetNumbers(); vector pos, posIn = fRowGroupIn.getOffsets(); size_t n = 0; @@ -116,9 +117,10 @@ void TupleHavingStep::initialize(const RowGroup& rgIn, const JobInfo& jobInfo) scale.insert(scale.end(), scaleIn.begin(), scaleIn.begin() + n); precision.insert(precision.end(), precisionIn.begin(), precisionIn.begin() + n); types.insert(types.end(), typesIn.begin(), typesIn.begin() + n); + csNums.insert(csNums.end(), csNumsIn.begin(), csNumsIn.begin() + n); pos.insert(pos.end(), posIn.begin(), posIn.begin() + n + 1); - fRowGroupOut = RowGroup(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold); + fRowGroupOut = RowGroup(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold); fRowGroupOut.initRow(&fRowOut); } diff --git a/dbcon/joblist/windowfunctionstep.cpp b/dbcon/joblist/windowfunctionstep.cpp index ab1acd1fe..6a76579a0 100755 --- a/dbcon/joblist/windowfunctionstep.cpp +++ b/dbcon/joblist/windowfunctionstep.cpp @@ -631,6 +631,7 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) const vector& oids = rg.getOIDs(); const vector& keys = rg.getKeys(); const vector& types = rg.getColTypes(); + const vector& csNums = rg.getCharsetNumbers(); const vector& scales = rg.getScale(); const vector& precisions = rg.getPrecision(); @@ -869,6 +870,7 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) vector scales1; vector precisions1; vector types1; + vector csNums1; pos1.push_back(2); for (size_t i = 0; i < retColCount; i++) @@ -880,10 +882,11 @@ void WindowFunctionStep::initialize(const RowGroup& rg, JobInfo& jobInfo) scales1.push_back(scales[j]); precisions1.push_back(precisions[j]); types1.push_back(types[j]); + csNums1.push_back(csNums[j]); } fRowGroupDelivered = RowGroup( - retColCount, pos1, oids1, keys1, types1, scales1, precisions1, jobInfo.stringTableThreshold); + retColCount, pos1, oids1, keys1, types1, csNums1, scales1, precisions1, jobInfo.stringTableThreshold); if (jobInfo.trace) cout << "delivered RG: " << fRowGroupDelivered.toString() << endl << endl; diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index e0d46abb6..a8c130d3a 100755 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -2955,6 +2955,7 @@ SimpleColumn* getSmallestColumn(boost::shared_ptr csc, sc->viewName(lower(tan.view)); sc->timeZone(gwi.thd->variables.time_zone->get_name()->ptr()); sc->resultType(csc->colType(oidlist[minWidthColOffset].objnum)); + sc->charsetNumber(3000); return sc; } @@ -3118,7 +3119,7 @@ CalpontSystemCatalog::ColType colType_MysqlToIDB (const Item* item) << item->result_type() << endl ); break; } - + ct.charsetNumber = item->collation.collation->number; return ct; } @@ -5013,6 +5014,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) colType.dataType = resultType.colDataType; colType.precision = resultType.precision; colType.scale = resultType.scale; + colType.charsetNumber = resultType.charsetNumber; colTypes[i] = colType; } diff --git a/dbcon/mysql/ha_window_function.cpp b/dbcon/mysql/ha_window_function.cpp index d7b112d0e..93031ad78 100644 --- a/dbcon/mysql/ha_window_function.cpp +++ b/dbcon/mysql/ha_window_function.cpp @@ -405,6 +405,7 @@ ReturnedColumn* buildWindowFunctionColumn(Item* item, gp_walk_info& gwi, bool& n colType.dataType = resultType.colDataType; colType.precision = resultType.precision; colType.scale = resultType.scale; + colType.charsetNumber = resultType.charsetNumber; colTypes[i] = colType; } diff --git a/ddlproc/ddlproc.cpp b/ddlproc/ddlproc.cpp index d9563d786..169c42352 100644 --- a/ddlproc/ddlproc.cpp +++ b/ddlproc/ddlproc.cpp @@ -18,6 +18,10 @@ /****************************************************************************************** ******************************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include + #include #include #include @@ -39,8 +43,8 @@ using namespace oam; #include "distributedenginecomm.h" using namespace joblist; -#include "boost/filesystem/operations.hpp" -#include "boost/filesystem/path.hpp" +//#include "boost/filesystem/operations.hpp" +//#include "boost/filesystem/path.hpp" #include "boost/progress.hpp" #include #include @@ -64,9 +68,6 @@ using namespace execplan; #include "crashtrace.h" #include "installdir.h" - -namespace fs = boost::filesystem; - namespace { DistributedEngineComm* Dec; @@ -98,8 +99,10 @@ void added_a_pm(int) int main(int argc, char* argv[]) { // Set locale language - utf8::idb_setlocale(); - + setlocale(LC_ALL, ""); + setlocale(LC_NUMERIC, "C"); + // Initialize the charset library + my_init(); // This is unset due to the way we start it program_invocation_short_name = const_cast("DDLProc"); diff --git a/dmlproc/dmlproc.cpp b/dmlproc/dmlproc.cpp index 4f67ef3ba..85cf53ccc 100644 --- a/dmlproc/dmlproc.cpp +++ b/dmlproc/dmlproc.cpp @@ -20,13 +20,17 @@ * * ***********************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include + #include #include #include #include #include -#include "boost/filesystem/operations.hpp" -#include "boost/filesystem/path.hpp" +//#include "boost/filesystem/operations.hpp" +//#include "boost/filesystem/path.hpp" #include "boost/progress.hpp" using namespace std; @@ -84,8 +88,6 @@ using namespace joblist; #include "crashtrace.h" #include "installdir.h" -namespace fs = boost::filesystem; - threadpool::ThreadPool DMLServer::fDmlPackagepool(10, 0); namespace @@ -514,7 +516,10 @@ int main(int argc, char* argv[]) BRM::DBRM dbrm; Oam oam; // Set locale language - utf8::idb_setlocale(); + setlocale(LC_ALL, ""); + setlocale(LC_NUMERIC, "C"); + // Initialize the charset library + my_init(); // This is unset due to the way we start it program_invocation_short_name = const_cast("DMLProc"); diff --git a/exemgr/main.cpp b/exemgr/main.cpp index af37a6244..dec1c15ad 100644 --- a/exemgr/main.cpp +++ b/exemgr/main.cpp @@ -39,13 +39,16 @@ * on the Front-End Processor where it is returned to the DBMS * front-end. */ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include #include #include #include - #include +#undef root_name #include #include "calpontselectexecutionplan.h" @@ -1434,7 +1437,10 @@ void cleanTempDir() int main(int argc, char* argv[]) { // Set locale language - utf8::idb_setlocale(); + setlocale(LC_ALL, ""); + setlocale(LC_NUMERIC, "C"); + // Initialize the charset library + my_init(); // This is unset due to the way we start it program_invocation_short_name = const_cast("ExeMgr"); diff --git a/oamapps/mcsadmin/mcsadmin.cpp b/oamapps/mcsadmin/mcsadmin.cpp index c2a13ab0f..bd9cd3144 100644 --- a/oamapps/mcsadmin/mcsadmin.cpp +++ b/oamapps/mcsadmin/mcsadmin.cpp @@ -185,6 +185,7 @@ int main(int argc, char* argv[]) setuid(0); // set effective ID to root; ignore return status #endif setlocale(LC_ALL, ""); + setlocale(LC_NUMERIC, "C"); Oam oam; char* pcommand = 0; diff --git a/primitives/primproc/primproc.cpp b/primitives/primproc/primproc.cpp index 04f9552c6..d54718277 100644 --- a/primitives/primproc/primproc.cpp +++ b/primitives/primproc/primproc.cpp @@ -21,7 +21,9 @@ * * ***********************************************************************/ - +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include #include #include @@ -316,6 +318,12 @@ int main(int argc, char* argv[]) // This is unset due to the way we start it program_invocation_short_name = const_cast("PrimProc"); + // Set locale language + setlocale(LC_ALL, ""); + setlocale(LC_NUMERIC, "C"); + // Initialize the charset library + my_init(); + int gDebug = 0; int c; diff --git a/procmgr/main.cpp b/procmgr/main.cpp index 79a806a7a..4e2995f32 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -102,31 +102,8 @@ int main(int argc, char** argv) setuid(0); // set effective ID to root; ignore return status #endif // Set locale language - const char* pLoc = utf8::idb_setlocale(); - try - { - logging::LoggingID lid(17); // ProcessManager - logging::MessageLog ml(lid); - logging::Message msg(1); - logging::Message::Args args; - if (pLoc) - { - // Log one line - args.add("Set locale to "); - args.add(pLoc); - msg.format( args ); - } - else - { - args.add("Failed to set locale "); - msg.format( args ); - } - ml.logErrorMessage(msg); - } - catch (...) - { - // Ignoring for time being. - } + setlocale(LC_ALL, ""); + setlocale(LC_NUMERIC, "C"); idbdatafile::IDBPolicy::configIDBPolicy(); diff --git a/procmon/main.cpp b/procmon/main.cpp index 34b4ad656..c278c3680 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -178,7 +178,8 @@ int main(int argc, char** argv) USER = p; // Set locale language - utf8::idb_setlocale(); + setlocale(LC_ALL, ""); + setlocale(LC_NUMERIC, "C"); //get tmp log directory tmpLogDir = startup::StartUp::tmpDir(); diff --git a/tools/dbloadxml/colxml.cpp b/tools/dbloadxml/colxml.cpp index 778d095e7..993e6d334 100644 --- a/tools/dbloadxml/colxml.cpp +++ b/tools/dbloadxml/colxml.cpp @@ -50,6 +50,7 @@ int main(int argc, char** argv) std::cerr << " colxml: couldn't set uid " << std::endl; } setlocale(LC_ALL, ""); + setlocale(LC_NUMERIC, "C"); WriteEngine::Config::initConfigCache(); // load Columnstore.xml config settings //Bug 6137 diff --git a/utils/funcexp/func_case.cpp b/utils/funcexp/func_case.cpp index 1a5f511ab..e69c12d84 100644 --- a/utils/funcexp/func_case.cpp +++ b/utils/funcexp/func_case.cpp @@ -22,6 +22,10 @@ * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include + #include //#define NDEBUG #include @@ -180,20 +184,21 @@ inline uint64_t simple_case_cmp(Row& row, case execplan::CalpontSystemCatalog::VARCHAR: { const string& ev = parm[n]->data()->getStrVal(row, isNull); - if (isNull) break; + CHARSET_INFO* cs = parm[n]->data()->resultType().getCharset(); for (i = 1; i <= whereCount; i++) { //BUG 5362 - if (utf8::idb_strcoll(ev.c_str(), parm[i]->data()->getStrVal(row, isNull).c_str()) == 0 && !isNull) + const string& p1 = parm[i]->data()->getStrVal(row, isNull); + if (isNull) + break; + if (cs->strnncoll(ev.c_str(), ev.length(), p1.c_str(), p1.length()) == 0) { foundIt = true; break; } - else - isNull = false; } break; diff --git a/utils/funcexp/func_char_length.cpp b/utils/funcexp/func_char_length.cpp index fbe8f6074..fb965cb3d 100644 --- a/utils/funcexp/func_char_length.cpp +++ b/utils/funcexp/func_char_length.cpp @@ -49,7 +49,7 @@ namespace funcexp CalpontSystemCatalog::ColType Func_char_length::operationType( FunctionParm& fp, CalpontSystemCatalog::ColType& resultType ) { - return resultType; + return fp[0]->data()->resultType(); } int64_t Func_char_length::getIntVal(rowgroup::Row& row, @@ -86,8 +86,7 @@ int64_t Func_char_length::getIntVal(rowgroup::Row& row, return 0; const char* b = tstr.c_str(); const char* e = tstr.c_str() + tstr.length(); - const CHARSET_INFO* cs = get_charset(parm[0]->data()->resultType().charsetNumber, MYF(MY_WME)); - return (int64_t)cs->numchars(b, e); + return (int64_t)parm[0]->data()->resultType().getCharset()->numchars(b, e); } case execplan::CalpontSystemCatalog::DATE: diff --git a/utils/funcexp/func_greatest.cpp b/utils/funcexp/func_greatest.cpp index e064eacb3..ff93fff3b 100644 --- a/utils/funcexp/func_greatest.cpp +++ b/utils/funcexp/func_greatest.cpp @@ -22,6 +22,10 @@ * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include + #include #include #include @@ -148,6 +152,7 @@ std::string Func_greatest::getStrVal(rowgroup::Row& row, execplan::CalpontSystemCatalog::ColType& op_ct) { const string& str = fp[0]->data()->getStrVal(row, isNull); + CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset(); string greatestStr = str; @@ -155,12 +160,10 @@ std::string Func_greatest::getStrVal(rowgroup::Row& row, { const string& str1 = fp[i]->data()->getStrVal(row, isNull); - int tmp = utf8::idb_strcoll(greatestStr.c_str(), str1.c_str()); - - if ( tmp < 0 ) - -// if ( greatestStr < str1 ) + if (cs->strnncoll(greatestStr.c_str(), greatestStr.length(), str1.c_str(), str1.length()) < 0) + { greatestStr = str1; + } } return greatestStr; diff --git a/utils/funcexp/func_in.cpp b/utils/funcexp/func_in.cpp index 2de6359e7..f8ff745c5 100644 --- a/utils/funcexp/func_in.cpp +++ b/utils/funcexp/func_in.cpp @@ -22,6 +22,10 @@ * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include + #include #include using namespace std; @@ -52,11 +56,6 @@ inline bool numericEQ(result_t op1, result_t op2) return op1 == op2; } -inline bool strEQ(string op1, string op2) -{ - return utf8::idb_strcoll(op1.c_str(), op2.c_str()) == 0; -} - inline bool getBoolForIn(rowgroup::Row& row, funcexp::FunctionParm& pm, bool& isNull, @@ -273,15 +272,16 @@ inline bool getBoolForIn(rowgroup::Row& row, case execplan::CalpontSystemCatalog::TEXT: { const string& val = pm[0]->data()->getStrVal(row, isNull); - if (isNull) return false; + CHARSET_INFO* cs = pm[0]->data()->resultType().getCharset(); + for (uint32_t i = 1; i < pm.size(); i++) { isNull = false; - - if ( utf8::idb_strcoll(val.c_str(), pm[i]->data()->getStrVal(row, isNull).c_str()) == 0 && !isNull) + const string& str1 = pm[i]->data()->getStrVal(row, isNull); + if (cs->strnncoll(val.c_str(), val.length(), str1.c_str(), str1.length()) == 0 && !isNull) return true; if (isNull && isNotIn) diff --git a/utils/funcexp/func_instr.cpp b/utils/funcexp/func_instr.cpp index 47f25fd27..f335eab71 100644 --- a/utils/funcexp/func_instr.cpp +++ b/utils/funcexp/func_instr.cpp @@ -20,6 +20,10 @@ * * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include +#include #include #include @@ -42,37 +46,50 @@ CalpontSystemCatalog::ColType Func_instr::operationType( FunctionParm& fp, Calpo return ct; } -size_t Func_instr::in_str(const string& str, const string& substr, size_t start) -{ - // convert both inputs to wide character strings - std::wstring wcstr = utf8::utf8_to_wstring(str); - std::wstring wcsubstr = utf8::utf8_to_wstring(substr); - - if ((str.length() && !wcstr.length()) || - (substr.length() && !wcsubstr.length())) - // this means one or both of the strings had conversion errors to wide character - return 0; - - size_t pos = wcstr.find(wcsubstr, start - 1); - return (pos != string::npos ? pos + 1 : 0); -} - int64_t Func_instr::getIntVal(rowgroup::Row& row, FunctionParm& parm, bool& isNull, - CalpontSystemCatalog::ColType&) + CalpontSystemCatalog::ColType& colType) { - uint64_t start = 1; - - if (parm.size() == 3) - start = parm[2]->data()->getIntVal(row, isNull); - - if (isNull || start == 0) + int64_t start = 0; + int64_t start0= 0; + my_match_t match; + + const std::string& str = parm[0]->data()->getStrVal(row, isNull); + if (isNull) + return 0; + const char* s1 = str.c_str(); + uint32_t l1 = (uint32_t)str.length(); + + const std::string& substr =parm[1]->data()->getStrVal(row, isNull); + if (isNull) return 0; - //Bug 5110 : to support utf8 char type, we have to convert and search - return in_str(parm[0]->data()->getStrVal(row, isNull), parm[1]->data()->getStrVal(row, isNull), start); + const char* s2 = substr.c_str(); + uint32_t l2 = (uint32_t)substr.length(); + if (l2 < 1) + return start + 1; + CHARSET_INFO* cs = colType.getCharset(); + + if (parm.size() == 3) + { + start0 = start = parm[2]->data()->getIntVal(row, isNull) - 1; + + if ((start < 0) || (start > l1)) + return 0; + + start = (int64_t)cs->charpos(s1, s1+l1, start); // adjust start for multi-byte + + if (start + l2 > l1) // Substring is longer than str at pos. + return 0; + } + + if (!cs->instr(s1+start, l1-start, + s2, l2, + &match, 1)) + return 0; + return (int64_t)match.mb_len + start0 + 1; } diff --git a/utils/funcexp/func_lcase.cpp b/utils/funcexp/func_lcase.cpp index ae399c986..6c40f1f2f 100644 --- a/utils/funcexp/func_lcase.cpp +++ b/utils/funcexp/func_lcase.cpp @@ -20,6 +20,10 @@ * * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include +#include #include using namespace std; @@ -56,31 +60,22 @@ CalpontSystemCatalog::ColType Func_lcase::operationType(FunctionParm& fp, Calpon std::string Func_lcase::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, - execplan::CalpontSystemCatalog::ColType&) + execplan::CalpontSystemCatalog::ColType& colType) { -// string str = fp[0]->data()->getStrVal(row, isNull); - -// transform (str.begin(), str.end(), str.begin(), to_lower()); - const string& tstr = fp[0]->data()->getStrVal(row, isNull); if (isNull) return ""; - size_t strwclen = utf8::idb_mbstowcs(0, tstr.c_str(), 0) + 1; - wchar_t* wcbuf = new wchar_t[strwclen]; - strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen); - wstring wstr(wcbuf, strwclen); + CHARSET_INFO* cs = colType.getCharset(); + uint64_t inLen = tstr.length(); + uint64_t bufLen= inLen * cs->casedn_multiply; + char* outBuf = new char[bufLen]; + + uint64_t outLen = cs->casedn(tstr.c_str(), inLen, outBuf, bufLen); - for (uint32_t i = 0; i < strwclen; i++) - wstr[i] = std::towlower(wstr[i]); - - size_t strmblen = utf8::idb_wcstombs(0, wstr.c_str(), 0) + 1; - char* outbuf = new char[strmblen]; - strmblen = utf8::idb_wcstombs(outbuf, wstr.c_str(), strmblen); - std::string ret(outbuf, strmblen); - delete [] outbuf; - delete [] wcbuf; + string ret = string(outBuf, outLen); + delete [] outBuf; return ret; } diff --git a/utils/funcexp/func_least.cpp b/utils/funcexp/func_least.cpp index 5f97ee892..84f7559b2 100644 --- a/utils/funcexp/func_least.cpp +++ b/utils/funcexp/func_least.cpp @@ -22,6 +22,10 @@ * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include + #include #include #include @@ -127,17 +131,16 @@ std::string Func_least::getStrVal(rowgroup::Row& row, execplan::CalpontSystemCatalog::ColType& op_ct) { string leastStr = fp[0]->data()->getStrVal(row, isNull); + CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset(); for (uint32_t i = 1; i < fp.size(); i++) { const string& str1 = fp[i]->data()->getStrVal(row, isNull); - int tmp = utf8::idb_strcoll(leastStr.c_str(), str1.c_str()); - - if ( tmp > 0 ) - -// if ( leastStr > str1 ) + if (cs->strnncoll(leastStr.c_str(), leastStr.length(), str1.c_str(), str1.length()) > 0) + { leastStr = str1; + } } return leastStr; diff --git a/utils/funcexp/func_nullif.cpp b/utils/funcexp/func_nullif.cpp index 3af8a7d4b..9b5e8c113 100644 --- a/utils/funcexp/func_nullif.cpp +++ b/utils/funcexp/func_nullif.cpp @@ -22,6 +22,11 @@ * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#undef LONGLONG_MIN +#include + #include #include #include @@ -363,6 +368,7 @@ string Func_nullif::getStrVal(rowgroup::Row& row, CalpontSystemCatalog::ColType& op_ct) { string exp1 = parm[0]->data()->getStrVal(row, isNull); + CHARSET_INFO* cs = parm[0]->data()->resultType().getCharset(); if (isNull) { @@ -395,7 +401,7 @@ string Func_nullif::getStrVal(rowgroup::Row& row, exp2 = exp2 + " 00:00:00"; } - if ( utf8::idb_strcoll(exp1.c_str(), exp2.c_str()) == 0 ) + if (cs->strnncoll(exp1.c_str(), exp1.length(), exp2.c_str(), exp2.length()) == 0) { isNull = true; return ""; diff --git a/utils/funcexp/func_strcmp.cpp b/utils/funcexp/func_strcmp.cpp index a0f7a8930..7c7c950c5 100644 --- a/utils/funcexp/func_strcmp.cpp +++ b/utils/funcexp/func_strcmp.cpp @@ -21,6 +21,10 @@ * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include + #include #include #include @@ -39,6 +43,10 @@ using namespace joblist; #include "utils_utf8.h" using namespace funcexp; +// Because including my_sys.h in a Columnstore header causes too many conflicts +struct charset_info_st; +typedef const struct charset_info_st CHARSET_INFO; + class to_lower { public: @@ -64,10 +72,11 @@ int64_t Func_strcmp::getIntVal(rowgroup::Row& row, bool& isNull, execplan::CalpontSystemCatalog::ColType& op_ct) { + CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset(); const string& str = fp[0]->data()->getStrVal(row, isNull); - const string& str1 = fp[1]->data()->getStrVal(row, isNull); - int ret = utf8::idb_strcoll(str.c_str(), str1.c_str()); + + int ret = cs->strnncoll(str.c_str(), str.length(), str1.c_str(), str1.length()); // mysql's strcmp returns only -1, 0, and 1 return (ret < 0 ? -1 : (ret > 0 ? 1 : 0)); } diff --git a/utils/funcexp/func_ucase.cpp b/utils/funcexp/func_ucase.cpp index f032de880..a19594977 100644 --- a/utils/funcexp/func_ucase.cpp +++ b/utils/funcexp/func_ucase.cpp @@ -20,6 +20,10 @@ * * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include +#include #include using namespace std; @@ -55,31 +59,22 @@ CalpontSystemCatalog::ColType Func_ucase::operationType(FunctionParm& fp, Calpon std::string Func_ucase::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, - execplan::CalpontSystemCatalog::ColType&) + execplan::CalpontSystemCatalog::ColType& colType) { -// string str = fp[0]->data()->getStrVal(row, isNull); - -// transform (str.begin(), str.end(), str.begin(), to_lower()); - const string& tstr = fp[0]->data()->getStrVal(row, isNull); if (isNull) return ""; - size_t strwclen = utf8::idb_mbstowcs(0, tstr.c_str(), 0) + 1; - wchar_t* wcbuf = new wchar_t[strwclen]; - strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen); - wstring wstr(wcbuf, strwclen); + CHARSET_INFO* cs = colType.getCharset(); + uint64_t inLen = tstr.length(); + uint64_t bufLen= inLen * cs->caseup_multiply; + char* outBuf = new char[bufLen]; + + uint64_t outLen = cs->caseup(tstr.c_str(), inLen, outBuf, bufLen); - for (uint32_t i = 0; i < strwclen; i++) - wstr[i] = std::towupper(wstr[i]); - - size_t strmblen = utf8::idb_wcstombs(0, wstr.c_str(), 0) + 1; - char* outbuf = new char[strmblen]; - strmblen = utf8::idb_wcstombs(outbuf, wstr.c_str(), strmblen); - std::string ret(outbuf, strmblen); - delete [] outbuf; - delete [] wcbuf; + string ret = string(outBuf, outLen); + delete [] outBuf; return ret; } diff --git a/utils/funcexp/functor_int.h b/utils/funcexp/functor_int.h index 3e2f465cb..425e6b485 100644 --- a/utils/funcexp/functor_int.h +++ b/utils/funcexp/functor_int.h @@ -84,8 +84,6 @@ public: execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& resultType); - size_t in_str(const std::string& str, const std::string& substr, size_t start); - int64_t getIntVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, diff --git a/utils/funcexp/utils_utf8.h b/utils/funcexp/utils_utf8.h deleted file mode 100644 index 273a853c7..000000000 --- a/utils/funcexp/utils_utf8.h +++ /dev/null @@ -1,303 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - * Copyright (C) 2016 MariaDB Corporation. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -// $Id$ - - -#ifndef _UTILS_UTF8_H_ -#define _UTILS_UTF8_H_ - - - -#include -#if defined(_MSC_VER) -#include -#include -#elif defined(__FreeBSD__) -//#include -#else -#include -#endif -#include - -#include -#include "liboamcpp.h" - -/** @file */ - -namespace funcexp -{ -namespace utf8 -{ -extern bool JPcodePoint; // code point ordering (Japanese UTF) flag, used in idb_strcoll - -const int MAX_UTF8_BYTES_PER_CHAR = 4; - -// A global loc object so we don't construct one at every compare -extern std::locale loc; -// Is there a way to construct a global reference to a facet? -// const std::collate& coll = std::use_facet >(loc); - -//Infinidb version of strlocale BUG 5362 -//set System Locale "C" by default -//return the system Locale currently set in from Columnstore.xml -inline -std::string idb_setlocale() -{ - // get and set locale language - std::string systemLang("C"); - oam::Oam oam; - static bool loggedMsg = false; - - try - { - oam.getSystemConfig("SystemLang", systemLang); - } - catch (...) - { - systemLang = "C"; - } - - char* pLoc = setlocale(LC_ALL, systemLang.c_str()); - - if (pLoc == NULL) - { - try - { - if (!loggedMsg) - { - //send alarm - alarmmanager::ALARMManager alarmMgr; - std::string alarmItem = "system"; - alarmMgr.sendAlarmReport(alarmItem.c_str(), oam::INVALID_LOCALE, alarmmanager::SET); - - // Log one line - logging::LoggingID lid(17); // ProcessManager -- probably the only one to find this for now - logging::MessageLog ml(lid); - logging::Message msg(1); - logging::Message::Args args; - args.add("Failed to set locale "); - args.add(systemLang.c_str()); - args.add(": Setting to 'C'. Critical alarm generated"); - msg.format( args ); - ml.logErrorMessage(msg); - - loggedMsg = true; - } - systemLang = "C"; - } - catch (...) - { - // Ignoring for time being. - } - } - else - { - try - { - //send alarm - alarmmanager::ALARMManager alarmMgr; - std::string alarmItem = "system"; - alarmMgr.sendAlarmReport(alarmItem.c_str(), oam::INVALID_LOCALE, alarmmanager::CLEAR); - } - catch (...) - { - // Ignoring for time being. - } - - } - - printf ("Locale is : %s\n", systemLang.c_str() ); - - //BUG 2991 - setlocale(LC_NUMERIC, "C"); - - if (systemLang.find("ja_JP") != std::string::npos) - JPcodePoint = true; - - // MCOL-1559 Save off the locale to save runtime cpus - std::locale localloc(systemLang.c_str()); - loc = localloc; - - return systemLang; -} - -// Infinidb version of strcoll. BUG 5362 -// strcoll() comparison while ja_JP.utf8 does not give correct results. -// For correct results strcmp() can be used. -inline -int idb_strcoll(const char* str1, const char* str2) -{ - if (JPcodePoint) - return strcmp(str1, str2); - else - return strcoll(str1, str2); -} - -// MCOL-1559 Add a trimmed version of strcoll -// The intent here is to make no copy of the original strings and -// not modify them, so we can't use trim to deal with the spaces. -inline -int idb_strtrimcoll(const std::string& str1, const std::string& str2) -{ - static const std::string whitespaces (" "); - const char* s1 = str1.c_str(); - const char* s2 = str2.c_str(); - - // Set found1 to the last non-whitespace char in str1 - std::size_t found1 = str1.find_last_not_of(whitespaces); - // Set found2 to the first whitespace char in str2 - std::size_t found2 = str2.find_last_not_of(whitespaces); - - // Are both strings empty or all whitespace? - if (found1 == std::string::npos && found2 == std::string::npos) - { - return 0; // they match - } - // If str1 is empty or all spaces - if (found1 == std::string::npos) - { - return -1; - } - // If str2 is empty or all spaces - if (found2 == std::string::npos) - { - return 1; - } - - // found1 and found2 point to the character that is not a space. - // compare wants it to point to one past. - found1 += 1; - found2 += 1; - // If no trimming needs doing, then strcoll is faster - if (found1 == str1.size() && found2 == str2.size()) - { - return idb_strcoll(s1, s2); - } - // Compare the (trimmed) strings - const std::collate& coll = std::use_facet >(loc); - int rtn = coll.compare(s1, s1+found1, s2, s2+found2); - return rtn; -} - -// BUG 5241 -// Infinidb specific mbstowcs(). This will handle both windows and unix platforms -// Params dest and max should have enough length to accomodate NULL -inline -size_t idb_mbstowcs(wchar_t* dest, const char* src, size_t max) -{ -#ifdef _MSC_VER - // 4th param (-1) denotes to convert till hit NULL char - // if 6th param max = 0, will return the required buffer size - size_t strwclen = MultiByteToWideChar(CP_UTF8, 0, src, -1, dest, (int)max); - // decrement the count of NULL; will become -1 on failure - return --strwclen; - -#else - return mbstowcs(dest, src, max); -#endif -} - -// BUG 5241 -// Infinidb specific wcstombs(). This will handle both windows and unix platforms -// Params dest and max should have enough length to accomodate NULL -inline -size_t idb_wcstombs(char* dest, const wchar_t* src, size_t max) -{ -#ifdef _MSC_VER - // 4th param (-1) denotes to convert till hit NULL char - //if 6th param max = 0, will return the required buffer size - size_t strmblen = WideCharToMultiByte( CP_UTF8, 0, src, -1, dest, (int)max, NULL, NULL); - // decrement the count of NULL; will become -1 on failure - return --strmblen; -#else - return wcstombs(dest, src, max); -#endif -} - -// convert UTF-8 string to wstring -inline -std::wstring utf8_to_wstring (const std::string& str) -{ - size_t bufsize = str.length() + 1; - - // Convert to wide characters. Do all further work in wide characters - wchar_t* wcbuf = new wchar_t[bufsize]; - // Passing +1 so that windows is happy to see extra position to place NULL - size_t strwclen = idb_mbstowcs(wcbuf, str.c_str(), str.length() + 1); - - // if result is -1 it means bad characters which may happen if locale is wrong. - // return an empty string - if ( strwclen == static_cast(-1) ) - strwclen = 0; - - std::wstring ret(wcbuf, strwclen); - - delete [] wcbuf; - return ret; -} - - -// convert wstring to UTF-8 string -inline -std::string wstring_to_utf8 (const std::wstring& str) -{ - char* outbuf = new char[(str.length() * MAX_UTF8_BYTES_PER_CHAR) + 1]; - // Passing +1 so that windows is happy to see extra position to place NULL - size_t strmblen = idb_wcstombs(outbuf, str.c_str(), str.length() * MAX_UTF8_BYTES_PER_CHAR + 1); - - // if result is -1 it means bad characters which may happen if locale is wrong. - // return an empty string - if ( strmblen == static_cast(-1) ) - strmblen = 0; - - std::string ret(outbuf, strmblen); - - delete [] outbuf; - return ret; -} - -inline -uint8_t utf8_truncate_point(const char* input, size_t length) -{ - // Find the beginning of a multibyte char to truncate at and return the - // number of bytes to truncate - if (length < 3) - { - return 0; - } - - const unsigned char* b = (const unsigned char*)(input) + length - 3; - - if (b[2] & 0x80) - { - // First byte in a new multi-byte sequence - if (b[2] & 0x40) return 1; - // 3 byte sequence - else if ((b[1] & 0xe0) == 0xe0) return 2; - // 4 byte sequence - else if ((b[0] & 0xf0) == 0xf0) return 3; - } - - return 0; -} - -} //namespace utf8 -} //namespace funcexp - -#endif diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index faa1dbad1..409469931 100755 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -24,6 +24,10 @@ * is the primary class. */ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include + #include #include #include @@ -384,36 +388,16 @@ inline void RowAggregation::updateFloatMinMax(float val1, float val2, int64_t co fRow.setFloatField(val1, col); } - - -#define STRCOLL_ENH__ - void RowAggregation::updateStringMinMax(string val1, string val2, int64_t col, int func) { - if (isNull(fRowGroupOut, fRow, col)) + CHARSET_INFO* cs = fRowGroupIn.getCharset(col); + int tmp = cs->strnncoll(val1.c_str(), val1.length(), val2.c_str(), val2.length()); + + if ((tmp < 0 && func == rowgroup::ROWAGG_MIN) || + (tmp > 0 && func == rowgroup::ROWAGG_MAX)) { fRow.setStringField(val1, col); } - -#ifdef STRCOLL_ENH__ - else - { - int tmp = utf8::idb_strcoll(val1.c_str(), val2.c_str()); - - if ((tmp < 0 && func == rowgroup::ROWAGG_MIN) || - (tmp > 0 && func == rowgroup::ROWAGG_MAX)) - { - fRow.setStringField(val1, col); - } - } - -#else - else if (minMax(val1, val2, func)) - { - fRow.setStringField(val1, col); - } - -#endif } //------------------------------------------------------------------------------ diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index aeb7dbf53..5a51f90af 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -53,6 +53,9 @@ #include "mcsv1_udaf.h" #include "constantcolumn.h" +// Because including my_sys.h in a Columnstore header causes too many conflicts +struct charset_info_st; +typedef const struct charset_info_st CHARSET_INFO; // To do: move code that depends on joblist to a proper subsystem. namespace joblist { @@ -706,7 +709,7 @@ protected: // We need a separate copy for each thread. mcsv1sdk::mcsv1Context fRGContext; - + // These are handy for testing the actual type of static_any for UDAF static const static_any::any& charTypeId; static const static_any::any& scharTypeId; diff --git a/utils/rowgroup/rowgroup.cpp b/utils/rowgroup/rowgroup.cpp index 31702fb43..049106102 100644 --- a/utils/rowgroup/rowgroup.cpp +++ b/utils/rowgroup/rowgroup.cpp @@ -505,8 +505,8 @@ Row::Row() : data(NULL), strings(NULL), userDataStore(NULL) { } Row::Row(const Row& r) : columnCount(r.columnCount), baseRid(r.baseRid), oldOffsets(r.oldOffsets), stOffsets(r.stOffsets), - offsets(r.offsets), colWidths(r.colWidths), types(r.types), data(r.data), - scale(r.scale), precision(r.precision), strings(r.strings), + offsets(r.offsets), colWidths(r.colWidths), types(r.types), charsetNumbers(r.charsetNumbers), + data(r.data), scale(r.scale), precision(r.precision), strings(r.strings), useStringTable(r.useStringTable), hasLongStringField(r.hasLongStringField), sTableThreshold(r.sTableThreshold), forceInline(r.forceInline), userDataStore(NULL) { } @@ -522,6 +522,7 @@ Row& Row::operator=(const Row& r) offsets = r.offsets; colWidths = r.colWidths; types = r.types; + charsetNumbers = r.charsetNumbers; data = r.data; scale = r.scale; precision = r.precision; @@ -1006,6 +1007,7 @@ RowGroup::RowGroup(uint32_t colCount, const vector& roids, const vector& tkeys, const vector& colTypes, + const vector& csNumbers, const vector& cscale, const vector& cprecision, uint32_t stringTableThreshold, @@ -1013,7 +1015,7 @@ RowGroup::RowGroup(uint32_t colCount, const vector& forceInlineData ) : columnCount(colCount), data(NULL), oldOffsets(positions), oids(roids), keys(tkeys), - types(colTypes), scale(cscale), precision(cprecision), rgData(NULL), strings(NULL), + types(colTypes), charsetNumbers(csNumbers), scale(cscale), precision(cprecision), rgData(NULL), strings(NULL), sTableThreshold(stringTableThreshold) { uint32_t i; @@ -1047,12 +1049,16 @@ RowGroup::RowGroup(uint32_t colCount, useStringTable = (stringTable && hasLongStringField); offsets = (useStringTable ? &stOffsets[0] : &oldOffsets[0]); + + // Set all the charsets to NULL for jit initialization. + charsets.insert(charsets.begin(), charsetNumbers.size(), NULL); } RowGroup::RowGroup(const RowGroup& r) : columnCount(r.columnCount), data(r.data), oldOffsets(r.oldOffsets), stOffsets(r.stOffsets), colWidths(r.colWidths), - oids(r.oids), keys(r.keys), types(r.types), scale(r.scale), precision(r.precision), + oids(r.oids), keys(r.keys), types(r.types), charsetNumbers(r.charsetNumbers), + charsets(r.charsets), scale(r.scale), precision(r.precision), rgData(r.rgData), strings(r.strings), useStringTable(r.useStringTable), hasLongStringField(r.hasLongStringField), sTableThreshold(r.sTableThreshold), forceInline(r.forceInline) @@ -1076,6 +1082,8 @@ RowGroup& RowGroup::operator=(const RowGroup& r) oids = r.oids; keys = r.keys; types = r.types; + charsetNumbers = r.charsetNumbers; + charsets = r.charsets; data = r.data; scale = r.scale; precision = r.precision; @@ -1120,6 +1128,7 @@ void RowGroup::serialize(ByteStream& bs) const serializeInlineVector(bs, oids); serializeInlineVector(bs, keys); serializeInlineVector(bs, types); + serializeInlineVector(bs, charsetNumbers); serializeInlineVector(bs, scale); serializeInlineVector(bs, precision); bs << (uint8_t) useStringTable; @@ -1139,6 +1148,7 @@ void RowGroup::deserialize(ByteStream& bs) deserializeInlineVector(bs, oids); deserializeInlineVector(bs, keys); deserializeInlineVector(bs, types); + deserializeInlineVector(bs, charsetNumbers); deserializeInlineVector(bs, scale); deserializeInlineVector(bs, precision); bs >> tmp8; @@ -1156,6 +1166,10 @@ void RowGroup::deserialize(ByteStream& bs) offsets = &stOffsets[0]; else if (!useStringTable && !oldOffsets.empty()) offsets = &oldOffsets[0]; + + // Set all the charsets to NULL for jit initialization. + charsets.insert(charsets.begin(), charsetNumbers.size(), NULL); + } void RowGroup::serializeRGData(ByteStream& bs) const @@ -1467,6 +1481,15 @@ void RowGroup::addToSysDataList(execplan::CalpontSystemCatalog::NJLSysDataList& } } +CHARSET_INFO* RowGroup::getCharset(uint32_t col) +{ + if (charsets[col] == NULL) + { + charsets[col] = get_charset(charsetNumbers[col], MYF(MY_WME)); + } + return charsets[col]; +} + void RowGroup::setDBRoot(uint32_t dbroot) { *((uint32_t*) &data[dbRootOffset]) = dbroot; diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h index 504dda86e..2ba350abb 100644 --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -58,6 +58,11 @@ #include "../winport/winport.h" +// Because including my_sys.h in a Columnstore header causes too many conflicts +struct charset_info_st; +typedef const struct charset_info_st CHARSET_INFO; + + // Workaround for my_global.h #define of isnan(X) causing a std::std namespace namespace rowgroup @@ -319,6 +324,7 @@ public: inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const; inline execplan::CalpontSystemCatalog::ColDataType* getColTypes(); inline const execplan::CalpontSystemCatalog::ColDataType* getColTypes() const; + inline uint32_t getCharsetNumber(uint32_t colIndex) const; // this returns true if the type is not CHAR or VARCHAR inline bool isCharType(uint32_t colIndex) const; @@ -461,6 +467,7 @@ private: uint32_t* offsets; uint32_t* colWidths; execplan::CalpontSystemCatalog::ColDataType* types; + uint32_t* charsetNumbers; uint8_t* data; uint32_t* scale; uint32_t* precision; @@ -569,6 +576,11 @@ inline const execplan::CalpontSystemCatalog::ColDataType* Row::getColTypes() con return types; } +inline uint32_t Row::getCharsetNumber(uint32_t col) const +{ + return charsetNumbers[col]; +} + inline bool Row::isCharType(uint32_t colIndex) const { return execplan::isCharType(types[colIndex]); @@ -1268,6 +1280,7 @@ public: @param coids An array of oids for each column. @param tkeys An array of unique id for each column. @param colTypes An array of COLTYPEs for each column. + @param charsetNumbers an Array of the lookup numbers for the charset/collation object. @param scale An array specifying the scale of DECIMAL types (0 for non-decimal) @param precision An array specifying the precision of DECIMAL types (0 for non-decimal) */ @@ -1277,6 +1290,7 @@ public: const std::vector& cOids, const std::vector& tkeys, const std::vector& colTypes, + const std::vector& charsetNumbers, const std::vector& scale, const std::vector& precision, uint32_t stringTableThreshold, @@ -1284,7 +1298,7 @@ public: const std::vector& forceInlineData = std::vector() ); - /** @brief The copiers. It copies metadata, not the row data */ + /** @brief The copiers. It copies metadata, not thetypes row data */ RowGroup(const RowGroup&); /** @brief Assignment operator. It copies metadata, not the row data */ @@ -1338,6 +1352,8 @@ public: inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const; inline const std::vector& getColTypes() const; inline std::vector& getColTypes(); + inline const std::vector& getCharsetNumbers() const; + inline uint32_t getCharsetNumber(uint32_t colIndex) const; inline boost::shared_array& getForceInline(); static inline uint32_t getHeaderSize() { @@ -1397,6 +1413,8 @@ public: uint16_t* blockNum); inline void setStringStore(boost::shared_ptr); + + CHARSET_INFO* getCharset(uint32_t col); private: uint32_t columnCount; @@ -1413,8 +1431,11 @@ private: // Used to map the projected column and rowgroup index std::vector keys; std::vector types; - - // DECIMAL support. For non-decimal fields, the values are 0. + // For string collation + std::vector charsetNumbers; + std::vector charsets; + + // DECIMAL support. For non-decimal fields, the valutypeses are 0. std::vector scale; std::vector precision; @@ -1547,6 +1568,7 @@ void RowGroup::initRow(Row* r, bool forceInlineData) const { r->colWidths = (uint32_t*) &colWidths[0]; r->types = (execplan::CalpontSystemCatalog::ColDataType*) & (types[0]); + r->charsetNumbers = (uint32_t*) & (charsetNumbers[0]); r->scale = (uint32_t*) & (scale[0]); r->precision = (uint32_t*) & (precision[0]); } @@ -1649,6 +1671,16 @@ inline std::vector& RowGroup::getCo return types; } +inline const std::vector& RowGroup::getCharsetNumbers() const +{ + return charsetNumbers; +} + +inline uint32_t RowGroup::getCharsetNumber(uint32_t colIndex) const +{ + return charsetNumbers[colIndex]; +} + inline const std::vector& RowGroup::getScale() const { return scale; diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index 3057cc1aa..d7096c052 100755 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -369,6 +369,11 @@ public: EXPORT mcsv1Context& operator=(const mcsv1Context& rhs); EXPORT mcsv1Context& copy(const mcsv1Context& rhs); + + // Character collation support + EXPORT void setCharsetNumber(uint32_t csNum); + EXPORT uint32_t getCharsetNumber(); // Returns the unique ID for the language/collation + EXPORT CHARSET_INFO* getCharset(); private: @@ -392,6 +397,7 @@ private: int32_t fParamCount; std::vector paramKeys; enum_mariadb_return_type mariadbReturnType; + uint32_t fCharsetNumber; public: // For use by the framework @@ -416,6 +422,7 @@ public: EXPORT void setParamCount(int32_t paramCount); std::vector* getParamKeys(); EXPORT void setMariaDBReturnType(enum_mariadb_return_type rt); + }; // Since aggregate functions can operate on any data type, we use the following structure @@ -438,7 +445,9 @@ struct ColumnDatum uint32_t scale; // If dataType is a DECIMAL type uint32_t precision; // If dataType is a DECIMAL type std::string alias; // Only filled in for init() - ColumnDatum() : dataType(execplan::CalpontSystemCatalog::UNDEFINED), scale(0), precision(-1) {}; + uint32_t charsetNumber; // For string collations + ColumnDatum() : dataType(execplan::CalpontSystemCatalog::UNDEFINED), + scale(0), precision(-1), charsetNumber(8) {}; }; // Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or @@ -658,7 +667,8 @@ inline mcsv1Context::mcsv1Context() : fStartConstant(0), fEndConstant(0), func(NULL), - fParamCount(0) + fParamCount(0), + fCharsetNumber(8) // Latin1 { } @@ -683,6 +693,7 @@ inline mcsv1Context& mcsv1Context::copy(const mcsv1Context& rhs) bInterrupted = rhs.bInterrupted; // Multiple threads will use the same reference func = rhs.func; fParamCount = rhs.fParamCount; + fCharsetNumber = rhs.fCharsetNumber; return *this; } @@ -979,6 +990,16 @@ inline void mcsv1Context::setMariaDBReturnType(enum_mariadb_return_type rt) mariadbReturnType = rt; } +inline void mcsv1Context::setCharsetNumber(uint32_t csNum) +{ + fCharsetNumber=csNum; +} + +inline uint32_t mcsv1Context::getCharsetNumber() +{ + return fCharsetNumber; +} + inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) { return NOT_IMPLEMENTED; diff --git a/versioning/BRM/masternode.cpp b/versioning/BRM/masternode.cpp index bd28649db..a358a04ae 100644 --- a/versioning/BRM/masternode.cpp +++ b/versioning/BRM/masternode.cpp @@ -103,7 +103,9 @@ void reload(int num) int main(int argc, char** argv) { // Set locale language - utf8::idb_setlocale(); + setlocale(LC_ALL, ""); + setlocale(LC_NUMERIC, "C"); + BRM::logInit ( BRM::SubSystemLogId_controllerNode ); diff --git a/versioning/BRM/slavenode.cpp b/versioning/BRM/slavenode.cpp index 5a981634c..d862a1b44 100644 --- a/versioning/BRM/slavenode.cpp +++ b/versioning/BRM/slavenode.cpp @@ -79,7 +79,8 @@ void reset(int sig) int main(int argc, char** argv) { // Set locale language - utf8::idb_setlocale(); + setlocale(LC_ALL, ""); + setlocale(LC_NUMERIC, "C"); BRM::logInit ( BRM::SubSystemLogId_workerNode ); diff --git a/writeengine/bulk/cpimport.cpp b/writeengine/bulk/cpimport.cpp index 6242c40a5..4a878210a 100644 --- a/writeengine/bulk/cpimport.cpp +++ b/writeengine/bulk/cpimport.cpp @@ -1092,7 +1092,17 @@ int main(int argc, char** argv) setupSignalHandlers(); // Set locale language - utf8::idb_setlocale(); + const char* pLoc = setlocale(LC_ALL, ""); + if (pLoc) + { + // Log one line + cout << "Locale = " << pLoc; + } + else + { + cout << "Failed to set locale "; + } + setlocale(LC_NUMERIC, "C"); // Initialize singleton instance of syslogging if (argc > 0) diff --git a/writeengine/server/we_server.cpp b/writeengine/server/we_server.cpp index d0cc3d889..1ac633cdf 100644 --- a/writeengine/server/we_server.cpp +++ b/writeengine/server/we_server.cpp @@ -19,6 +19,9 @@ * $Id: we_server.cpp 4700 2013-07-08 16:43:49Z bpaul $ * *******************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include #include #include @@ -102,7 +105,10 @@ int setupResources() int main(int argc, char** argv) { // Set locale language - utf8::idb_setlocale(); + setlocale(LC_ALL, ""); + setlocale(LC_NUMERIC, "C"); + // Initialize the charset library + my_init(); // This is unset due to the way we start it program_invocation_short_name = const_cast("WriteEngineServ"); @@ -135,8 +141,7 @@ int main(int argc, char** argv) { } } - //BUG 2991 - setlocale(LC_NUMERIC, "C"); + #ifndef _MSC_VER struct sigaction sa; memset(&sa, 0, sizeof(sa));