1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-01 06:46:55 +03:00

MCOL-3536 collation

This commit is contained in:
David Hall
2020-05-26 12:42:11 -05:00
parent 11ba12f6ea
commit 06e50e0926
47 changed files with 516 additions and 535 deletions

View File

@ -159,13 +159,14 @@ void tupleKeyToProjectStep(uint32_t key, JobStepVector& jsv, JobInfo& jobInfo)
inline void addColumnToRG(uint32_t cid, vector<uint32_t>& pos, vector<uint32_t>& oids,
vector<uint32_t>& keys, vector<uint32_t>& scale, vector<uint32_t>& precision,
vector<CalpontSystemCatalog::ColDataType>& types, JobInfo& jobInfo)
vector<CalpontSystemCatalog::ColDataType>& types, vector<uint32_t>& csNums, JobInfo& jobInfo)
{
TupleInfo ti(getTupleInfo(cid, jobInfo));
pos.push_back(pos.back() + ti.width);
oids.push_back(ti.oid);
keys.push_back(ti.key);
types.push_back(ti.dtype);
csNums.push_back(ti.csNum);
scale.push_back(ti.scale);
precision.push_back(ti.precision);
}
@ -173,19 +174,20 @@ inline void addColumnToRG(uint32_t cid, vector<uint32_t>& pos, vector<uint32_t>&
inline void addColumnInExpToRG(uint32_t cid, vector<uint32_t>& pos, vector<uint32_t>& oids,
vector<uint32_t>& keys, vector<uint32_t>& scale, vector<uint32_t>& precision,
vector<CalpontSystemCatalog::ColDataType>& types, JobInfo& jobInfo)
vector<CalpontSystemCatalog::ColDataType>& types, vector<uint32_t>& csNums, JobInfo& jobInfo)
{
if (jobInfo.keyInfo->dictKeyMap.find(cid) != jobInfo.keyInfo->dictKeyMap.end())
cid = jobInfo.keyInfo->dictKeyMap[cid];
if (find(keys.begin(), keys.end(), cid) == keys.end())
addColumnToRG(cid, pos, oids, keys, scale, precision, types, jobInfo);
addColumnToRG(cid, pos, oids, keys, scale, precision, types, csNums, jobInfo);
}
inline void addColumnsToRG(uint32_t tid, vector<uint32_t>& pos, vector<uint32_t>& oids,
vector<uint32_t>& keys, vector<uint32_t>& scale, vector<uint32_t>& precision,
vector<CalpontSystemCatalog::ColDataType>& types,
vector<uint32_t>& csNums,
TableInfoMap& tableInfoMap, JobInfo& jobInfo)
{
// -- the selected columns
@ -193,7 +195,7 @@ inline void addColumnsToRG(uint32_t tid, vector<uint32_t>& pos, vector<uint32_t>
for (unsigned i = 0; i < pjCol.size(); i++)
{
addColumnToRG(pjCol[i], pos, oids, keys, scale, precision, types, jobInfo);
addColumnToRG(pjCol[i], pos, oids, keys, scale, precision, types, csNums, jobInfo);
}
// -- any columns will be used in cross-table exps
@ -201,7 +203,7 @@ inline void addColumnsToRG(uint32_t tid, vector<uint32_t>& pos, vector<uint32_t>
for (unsigned i = 0; i < exp2.size(); i++)
{
addColumnInExpToRG(exp2[i], pos, oids, keys, scale, precision, types, jobInfo);
addColumnInExpToRG(exp2[i], pos, oids, keys, scale, precision, types, csNums, jobInfo);
}
// -- any columns will be used in returned exps
@ -209,7 +211,7 @@ inline void addColumnsToRG(uint32_t tid, vector<uint32_t>& pos, vector<uint32_t>
for (unsigned i = 0; i < expr.size(); i++)
{
addColumnInExpToRG(expr[i], pos, oids, keys, scale, precision, types, jobInfo);
addColumnInExpToRG(expr[i], pos, oids, keys, scale, precision, types, csNums, jobInfo);
}
// -- any columns will be used in final outer join expression
@ -217,7 +219,7 @@ inline void addColumnsToRG(uint32_t tid, vector<uint32_t>& pos, vector<uint32_t>
for (unsigned i = 0; i < expo.size(); i++)
{
addColumnInExpToRG(expo[i], pos, oids, keys, scale, precision, types, jobInfo);
addColumnInExpToRG(expo[i], pos, oids, keys, scale, precision, types, csNums, jobInfo);
}
}
@ -232,6 +234,7 @@ void constructJoinedRowGroup(RowGroup& rg, uint32_t large, uint32_t prev, bool r
vector<uint32_t> scale;
vector<uint32_t> precision;
vector<CalpontSystemCatalog::ColDataType> types;
vector<uint32_t> csNums;
pos.push_back(2);
// -- start with the join keys
@ -242,14 +245,14 @@ void constructJoinedRowGroup(RowGroup& rg, uint32_t large, uint32_t prev, bool r
vector<uint32_t>& joinKeys = jobInfo.tableJoinMap[make_pair(large, prev)].fLeftKeys;
for (vector<uint32_t>::iterator i = joinKeys.begin(); i != joinKeys.end(); i++)
addColumnToRG(*i, pos, oids, keys, scale, precision, types, jobInfo);
addColumnToRG(*i, pos, oids, keys, scale, precision, types, csNums, jobInfo);
}
// -- followed by the columns in select or expression
for (set<uint32_t>::iterator i = tableSet.begin(); i != tableSet.end(); i++)
addColumnsToRG(*i, pos, oids, keys, scale, precision, types, tableInfoMap, jobInfo);
addColumnsToRG(*i, pos, oids, keys, scale, precision, types, csNums, tableInfoMap, jobInfo);
RowGroup tmpRg(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold);
RowGroup tmpRg(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold);
rg = tmpRg;
}
@ -264,12 +267,13 @@ void constructJoinedRowGroup(RowGroup& rg, set<uint32_t>& tableSet, TableInfoMap
vector<uint32_t> scale;
vector<uint32_t> precision;
vector<CalpontSystemCatalog::ColDataType> types;
vector<uint32_t> csNums;
pos.push_back(2);
for (set<uint32_t>::iterator i = tableSet.begin(); i != tableSet.end(); i++)
{
// columns in select or expression
addColumnsToRG(*i, pos, oids, keys, scale, precision, types, tableInfoMap, jobInfo);
addColumnsToRG(*i, pos, oids, keys, scale, precision, types, csNums, tableInfoMap, jobInfo);
// keys to be joined if not already in the rowgroup
vector<uint32_t>& adjList = tableInfoMap[*i].fAdjacentList;
@ -284,13 +288,13 @@ void constructJoinedRowGroup(RowGroup& rg, set<uint32_t>& tableSet, TableInfoMap
for (vector<uint32_t>::iterator k = joinKeys.begin(); k != joinKeys.end(); k++)
{
if (find(keys.begin(), keys.end(), *k) == keys.end())
addColumnToRG(*k, pos, oids, keys, scale, precision, types, jobInfo);
addColumnToRG(*k, pos, oids, keys, scale, precision, types, csNums, jobInfo);
}
}
}
}
RowGroup tmpRg(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold);
RowGroup tmpRg(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold);
rg = tmpRg;
}
@ -339,6 +343,7 @@ void adjustLastStep(JobStepVector& querySteps, DeliveredTableMap& deliverySteps,
vector<uint32_t> scale;
vector<uint32_t> precision;
vector<CalpontSystemCatalog::ColDataType> types;
vector<uint32_t> csNums;
pos.push_back(2);
for (unsigned i = 0; i < v.size(); i++)
@ -347,11 +352,12 @@ void adjustLastStep(JobStepVector& querySteps, DeliveredTableMap& deliverySteps,
oids.push_back(v[i].oid);
keys.push_back(v[i].key);
types.push_back(v[i].dtype);
csNums.push_back(v[i].csNum);
scale.push_back(v[i].scale);
precision.push_back(v[i].precision);
}
RowGroup rg1(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold);
RowGroup rg1(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold);
// evaluate the returned/groupby expressions if any
JobStepVector& expSteps = jobInfo.returnedExpressions;
@ -365,6 +371,7 @@ void adjustLastStep(JobStepVector& querySteps, DeliveredTableMap& deliverySteps,
scale.clear();
precision.clear();
types.clear();
csNums.clear();
pos.push_back(2);
const vector<uint32_t>& keys0 = rg0->getKeys();
@ -377,6 +384,7 @@ void adjustLastStep(JobStepVector& querySteps, DeliveredTableMap& deliverySteps,
oids.push_back(v[i].oid);
keys.push_back(v[i].key);
types.push_back(v[i].dtype);
csNums.push_back(v[i].csNum);
scale.push_back(v[i].scale);
precision.push_back(v[i].precision);
}
@ -384,7 +392,7 @@ void adjustLastStep(JobStepVector& querySteps, DeliveredTableMap& deliverySteps,
// for v0.9.3.0, the output and input to the expression are in the same row
// add the returned column into the rg0 as rg01
RowGroup rg01 = *rg0 + RowGroup(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold);
RowGroup rg01 = *rg0 + RowGroup(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold);
if (jobInfo.trace) cout << "Output RowGroup 01: " << rg01.toString() << endl;
@ -640,6 +648,7 @@ void addProjectStepsToBps(TableInfoMap::iterator& mit, BatchPrimitive* bps, JobI
vector<uint32_t> scale;
vector<uint32_t> precision;
vector<CalpontSystemCatalog::ColDataType> types;
vector<uint32_t> csNums;
pos.push_back(2);
// this psv is a copy of the project steps, the original vector in mit is not changed
@ -730,6 +739,7 @@ void addProjectStepsToBps(TableInfoMap::iterator& mit, BatchPrimitive* bps, JobI
oids.push_back(ti.oid);
keys.push_back(ti.key);
types.push_back(ti.dtype);
csNums.push_back(ti.csNum);
scale.push_back(ti.scale);
precision.push_back(ti.precision);
}
@ -742,12 +752,13 @@ void addProjectStepsToBps(TableInfoMap::iterator& mit, BatchPrimitive* bps, JobI
oids.push_back(ti.oid);
keys.push_back(ti.key);
types.push_back(ti.dtype);
csNums.push_back(ti.csNum);
scale.push_back(ti.scale);
precision.push_back(ti.precision);
}
// construct RowGroup
RowGroup rg(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold);
RowGroup rg(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold);
// fix the output association
AnyDataListSPtr spdl(new AnyDataList());
@ -818,6 +829,7 @@ void addExpresssionStepsToBps(TableInfoMap::iterator& mit, SJSTEP& sjsp, JobInfo
vector<uint32_t> scale;
vector<uint32_t> precision;
vector<CalpontSystemCatalog::ColDataType> types;
vector<uint32_t> csNums;
pos.push_back(2);
vector<uint32_t> cols;
@ -854,12 +866,13 @@ void addExpresssionStepsToBps(TableInfoMap::iterator& mit, SJSTEP& sjsp, JobInfo
oids.push_back(ti.oid);
keys.push_back(ti.key);
types.push_back(ti.dtype);
csNums.push_back(ti.csNum);
scale.push_back(ti.scale);
precision.push_back(ti.precision);
}
// construct RowGroup and add to TBPS
RowGroup rg(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold);
RowGroup rg(oids.size(), pos, oids, keys, types, scale, csNums, precision, jobInfo.stringTableThreshold);
bps->setFE1Input(rg);
if (jobInfo.trace) cout << "FE1 input RowGroup: " << rg.toString() << endl << endl;
@ -1025,6 +1038,7 @@ bool combineJobStepsByTable(TableInfoMap::iterator& mit, JobInfo& jobInfo)
vector<uint32_t> scale;
vector<uint32_t> precision;
vector<CalpontSystemCatalog::ColDataType> types;
vector<uint32_t> csNums;
pos.push_back(2);
pos.push_back(2 + 8);
@ -1033,10 +1047,11 @@ bool combineJobStepsByTable(TableInfoMap::iterator& mit, JobInfo& jobInfo)
uint32_t keyId = pds->tupleId();
keys.push_back(keyId);
types.push_back(CalpontSystemCatalog::BIGINT);
csNums.push_back(pds->colType().charsetNumber);
scale.push_back(0);
precision.push_back(0);
RowGroup rg(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold);
RowGroup rg(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold);
if (jobInfo.trace) cout << "RowGroup pds(and): " << rg.toString() << endl;
@ -1341,6 +1356,7 @@ bool combineJobStepsByTable(TableInfoMap::iterator& mit, JobInfo& jobInfo)
vector<uint32_t> scale;
vector<uint32_t> precision;
vector<CalpontSystemCatalog::ColDataType> types;
vector<uint32_t> csNums;
pos.push_back(2);
for (unsigned i = 0; i < tis.size(); i++)
@ -1349,11 +1365,12 @@ bool combineJobStepsByTable(TableInfoMap::iterator& mit, JobInfo& jobInfo)
oids.push_back(tis[i].oid);
keys.push_back(tis[i].key);
types.push_back(tis[i].dtype);
csNums.push_back(tis[i].csNum);
scale.push_back(tis[i].scale);
precision.push_back(tis[i].precision);
}
RowGroup addRg(oids.size(), pos, oids, keys, types, scale, precision,
RowGroup addRg(oids.size(), pos, oids, keys, types, csNums, scale, precision,
jobInfo.stringTableThreshold);
RowGroup feRg1 = feRg;
@ -3985,6 +4002,7 @@ SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo&
vector<uint32_t> precision;
vector<uint32_t> width;
vector<CalpontSystemCatalog::ColDataType> types;
vector<uint32_t> csNums;
JobStepAssociation jsaToUnion;
// bug4388, share code with connector for column type coversion
@ -4009,10 +4027,12 @@ SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo&
const vector<uint32_t>& scaleIn = rg.getScale();
const vector<uint32_t>& precisionIn = rg.getPrecision();
const vector<CalpontSystemCatalog::ColDataType>& typesIn = rg.getColTypes();
const vector<uint32_t>& csNumsIn = rg.getCharsetNumbers();
for (uint64_t j = 0; j < colCount; ++j)
{
queryColTypes[j][i].colDataType = typesIn[j];
queryColTypes[j][i].charsetNumber = csNumsIn[j];
queryColTypes[j][i].scale = scaleIn[j];
queryColTypes[j][i].precision = precisionIn[j];
queryColTypes[j][i].colWidth = rg.getColumnWidth(j);
@ -4054,6 +4074,7 @@ SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo&
{
CalpontSystemCatalog::ColType colType = DataConvert::convertUnionColType(queryColTypes[j]);
types.push_back(colType.colDataType);
csNums.push_back(colType.charsetNumber);
scale.push_back(colType.scale);
precision.push_back(colType.precision);
width.push_back(colType.colWidth);
@ -4067,7 +4088,7 @@ SJSTEP unionQueries(JobStepVector& queries, uint64_t distinctUnionNum, JobInfo&
unionStep->setInputRowGroups(inputRGs);
unionStep->setDistinctFlags(distinct);
unionStep->setOutputRowGroup(RowGroup(oids.size(), pos, oids, keys, types, scale, precision, jobInfo.stringTableThreshold));
unionStep->setOutputRowGroup(RowGroup(oids.size(), pos, oids, keys, types, csNums, scale, precision, jobInfo.stringTableThreshold));
// Fix for bug 4388 adjusts the result type at connector side, this workaround is obsolete.
// bug 3067, update the returned column types.