You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-10-31 18:30:33 +03:00
feat(rbo,rules,QA): index column type is now derived from the corresponding Field
This commit is contained in:
@@ -207,6 +207,7 @@ SimpleColumn::SimpleColumn(const SimpleColumn& rhs, const uint32_t sessionID)
|
||||
, fTimeZone(rhs.timeZone())
|
||||
, fisColumnStore(rhs.isColumnStore())
|
||||
{
|
||||
fResultType = rhs.resultType();
|
||||
}
|
||||
|
||||
SimpleColumn::SimpleColumn(const ReturnedColumn& rhs, const uint32_t sessionID)
|
||||
@@ -250,6 +251,7 @@ SimpleColumn& SimpleColumn::operator=(const SimpleColumn& rhs)
|
||||
fDistinct = rhs.distinct();
|
||||
fisColumnStore = rhs.isColumnStore();
|
||||
fPartitions = rhs.fPartitions;
|
||||
fResultType = rhs.resultType();
|
||||
}
|
||||
|
||||
return *this;
|
||||
|
||||
@@ -2651,6 +2651,39 @@ CalpontSystemCatalog::ColType colType_MysqlToIDB(const Item* item)
|
||||
return ct;
|
||||
}
|
||||
|
||||
// Simplified version to support QA-specific RBO re-write rule.
|
||||
// TBD turn into template to merge with colType_MysqlToIDB for Item
|
||||
CalpontSystemCatalog::ColType colType_MysqlToIDB(const Field* field)
|
||||
{
|
||||
CalpontSystemCatalog::ColType ct;
|
||||
ct.precision = 4;
|
||||
|
||||
switch (field->result_type())
|
||||
{
|
||||
case INT_RESULT:
|
||||
if (field->is_unsigned())
|
||||
{
|
||||
ct.colDataType = CalpontSystemCatalog::UBIGINT;
|
||||
}
|
||||
else
|
||||
{
|
||||
ct.colDataType = CalpontSystemCatalog::BIGINT;
|
||||
}
|
||||
|
||||
ct.colWidth = 8;
|
||||
break;
|
||||
|
||||
case STRING_RESULT:
|
||||
ct.colDataType = CalpontSystemCatalog::VARCHAR;
|
||||
|
||||
default:
|
||||
IDEBUG(cerr << "colType_MysqlToIDB:: Unknown result type of MySQL " << item->result_type() << endl);
|
||||
break;
|
||||
}
|
||||
ct.charsetNumber = field->charset()->number;
|
||||
return ct;
|
||||
}
|
||||
|
||||
bool itemDisablesWrapping(Item* item, gp_walk_info& gwi)
|
||||
{
|
||||
if (gwi.select_lex == nullptr)
|
||||
@@ -4184,6 +4217,62 @@ SimpleColumn* buildSimpleColumn(Item_field* ifp, gp_walk_info& gwi)
|
||||
return sc;
|
||||
}
|
||||
|
||||
SimpleColumn* buildSimpleColumnFromFieldForStatistics(Field* field, gp_walk_info& gwi)
|
||||
{
|
||||
if (!gwi.csc)
|
||||
{
|
||||
gwi.csc = CalpontSystemCatalog::makeCalpontSystemCatalog(gwi.sessionid);
|
||||
gwi.csc->identity(CalpontSystemCatalog::FE);
|
||||
}
|
||||
|
||||
CalpontSystemCatalog::ColType ct;
|
||||
datatypes::SimpleColumnParam prm(gwi.sessionid, true);
|
||||
|
||||
try
|
||||
{
|
||||
// check foreign engine
|
||||
if (field->table)
|
||||
prm.columnStore(ha_mcs_common::isMCSTable(field->table));
|
||||
|
||||
if (prm.columnStore())
|
||||
{
|
||||
ct = gwi.csc->colType(gwi.csc->lookupOID(
|
||||
make_tcn(field->table->s->db.str, field->table->s->table_name.str, field->field_name.str)));
|
||||
}
|
||||
else
|
||||
{
|
||||
ct = colType_MysqlToIDB(field);
|
||||
}
|
||||
}
|
||||
catch (std::exception& ex)
|
||||
{
|
||||
gwi.fatalParseError = true;
|
||||
gwi.parseErrorText = ex.what();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const datatypes::DatabaseQualifiedColumnName name(field->table->s->db.str, field->table->s->table_name.str,
|
||||
field->field_name.str);
|
||||
const datatypes::TypeHandler* h = ct.typeHandler();
|
||||
SimpleColumn* sc = h->newSimpleColumn(name, ct, prm);
|
||||
|
||||
sc->resultType(ct);
|
||||
sc->charsetNumber(field->charset()->number);
|
||||
string tbname(field->table->s->table_name.str);
|
||||
|
||||
// Note: differs with the original buildSimpleColumn
|
||||
sc->tableAlias(field->table->alias.c_ptr(), lower_case_table_names);
|
||||
|
||||
sc->alias(field->field_name.str);
|
||||
sc->isColumnStore(prm.columnStore());
|
||||
sc->timeZone(gwi.timeZone);
|
||||
|
||||
sc->oid(field->field_index + 1); // ExeMgr requires offset started from 1
|
||||
// TODO add partitions support here
|
||||
|
||||
return sc;
|
||||
}
|
||||
|
||||
ParseTree* buildParseTree(Item* item, gp_walk_info& gwi, bool& /*nonSupport*/)
|
||||
{
|
||||
ParseTree* pt = 0;
|
||||
@@ -5215,7 +5304,6 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP&
|
||||
{
|
||||
for (uint j = 0; j < table_ptr->table->s->keys; j++)
|
||||
{
|
||||
// for (uint i = 0; i < table_ptr->table->s->key_info[j].usable_key_parts; i++)
|
||||
{
|
||||
Field* field = table_ptr->table->key_info[j].key_part[0].field;
|
||||
std::cout << "j index " << j << " i column " << 0 << " fieldnr "
|
||||
@@ -5225,28 +5313,29 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP&
|
||||
auto* histogram = dynamic_cast<Histogram_json_hb*>(field->read_stats->histogram);
|
||||
if (histogram)
|
||||
{
|
||||
std::cout << " has stats ";
|
||||
SchemaAndTableName tableName = {field->table->s->db.str,
|
||||
field->table->s->table_name.str};
|
||||
execplan::SimpleColumn simpleColumn = {field->table->s->db.str,
|
||||
field->table->s->table_name.str,
|
||||
field->field_name.str};
|
||||
std::cout << " has stats with " << histogram->buckets.size() << " buckets";
|
||||
SchemaAndTableName tableName = {field->table->s->db.str, field->table->s->table_name.str};
|
||||
auto* sc = buildSimpleColumnFromFieldForStatistics(field, gwi);
|
||||
std::cout << "sc with stats !!!!! " << sc->toString() << std::endl;
|
||||
// execplan::SimpleColumn simpleColumn = {
|
||||
// field->table->s->db.str, field->table->s->table_name.str, field->field_name.str, false};
|
||||
|
||||
auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName);
|
||||
if (tableStatisticsMapIt == gwi.tableStatisticsMap.end())
|
||||
{
|
||||
gwi.tableStatisticsMap[tableName][field->field_name.str] = {simpleColumn, {*histogram}};
|
||||
gwi.tableStatisticsMap[tableName][field->field_name.str] = {*sc, {histogram}};
|
||||
}
|
||||
else
|
||||
{
|
||||
auto columnStatisticsMapIt = tableStatisticsMapIt->second.find(field->field_name.str);
|
||||
if (columnStatisticsMapIt == tableStatisticsMapIt->second.end())
|
||||
{
|
||||
tableStatisticsMapIt->second[field->field_name.str] = {simpleColumn, {*histogram}};
|
||||
tableStatisticsMapIt->second[field->field_name.str] = {*sc, {histogram}};
|
||||
}
|
||||
else
|
||||
{
|
||||
auto columnStatisticsVec = columnStatisticsMapIt->second.second;
|
||||
columnStatisticsVec.push_back(*histogram);
|
||||
columnStatisticsVec.push_back(histogram);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -116,7 +116,7 @@ typedef std::map<execplan::CalpontSystemCatalog::TableAliasName, std::pair<int,
|
||||
typedef std::tr1::unordered_map<TABLE_LIST*, std::vector<COND*>> TableOnExprList;
|
||||
typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
|
||||
using ColumnName = std::string;
|
||||
using ColumnStatisticsMap = std::unordered_map<ColumnName, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb>>>;
|
||||
using ColumnStatisticsMap = std::unordered_map<ColumnName, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb*>>>;
|
||||
using TableStatisticsMap = std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
|
||||
|
||||
// This structure is used to store MDB AST -> CSEP translation context.
|
||||
|
||||
@@ -64,7 +64,8 @@ bool someAreForeignTables(execplan::CalpontSelectExecutionPlan& csep)
|
||||
[](const auto& table) { return !table.isColumnstore(); });
|
||||
}
|
||||
|
||||
bool someForeignTablesHasStatisticsAndMbIndex(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx)
|
||||
bool someForeignTablesHasStatisticsAndMbIndex(execplan::CalpontSelectExecutionPlan& csep,
|
||||
optimizer::RBOptimizerContext& ctx)
|
||||
{
|
||||
return std::any_of(
|
||||
csep.tableList().begin(), csep.tableList().end(),
|
||||
@@ -100,12 +101,13 @@ execplan::ParseTree* filtersWithNewRange(execplan::SCSEP& csep, execplan::Simple
|
||||
// There is a question with ownership of the const column
|
||||
// TODO here we lost upper bound value if predicate is not changed to weak lt
|
||||
execplan::SOP ltOp = (isLast) ? boost::make_shared<execplan::Operator>(execplan::PredicateOperator("<="))
|
||||
: boost::make_shared<execplan::Operator>(execplan::PredicateOperator("<"));
|
||||
: boost::make_shared<execplan::Operator>(execplan::PredicateOperator("<"));
|
||||
ltOp->setOpType(filterColLeftOp->resultType(), tableKeyColumnLeftOp->resultType());
|
||||
ltOp->resultType(ltOp->operationType());
|
||||
|
||||
auto* sfr = new execplan::SimpleFilter(ltOp, tableKeyColumnLeftOp, filterColLeftOp);
|
||||
// TODO new
|
||||
// TODO new
|
||||
// TODO remove new and re-use tableKeyColumnLeftOp
|
||||
auto tableKeyColumnRightOp = new execplan::SimpleColumn(column);
|
||||
tableKeyColumnRightOp->resultType(column.resultType());
|
||||
// TODO hardcoded column type and value
|
||||
@@ -173,63 +175,74 @@ execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPl
|
||||
}
|
||||
|
||||
// TBD
|
||||
Histogram_json_hb& chooseStatisticsToUse(std::vector<Histogram_json_hb>& columnStatisticsVec)
|
||||
Histogram_json_hb* chooseStatisticsToUse(std::vector<Histogram_json_hb*>& columnStatisticsVec)
|
||||
{
|
||||
return columnStatisticsVec.front();
|
||||
}
|
||||
|
||||
// Looking for a projected column that comes first in an available index and has EI statistics
|
||||
// INV nullptr signifies that no suitable column was found
|
||||
std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyColumnAndStatistics(
|
||||
execplan::CalpontSystemCatalog::TableAliasName& targetTable, optimizer::RBOptimizerContext& ctx)
|
||||
{
|
||||
cal_impl_if::SchemaAndTableName schemaAndTableName = {targetTable.schema, targetTable.table};
|
||||
|
||||
auto tableColumnsStatisticsIt = ctx.gwi.tableStatisticsMap.find(schemaAndTableName);
|
||||
if (tableColumnsStatisticsIt == ctx.gwi.tableStatisticsMap.end() ||
|
||||
tableColumnsStatisticsIt->second.empty())
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// TODO take some column and some stats for it!!!
|
||||
for (auto& [columnName, scAndStatisticsVec] : tableColumnsStatisticsIt->second)
|
||||
{
|
||||
auto& [sc, columnStatisticsVec] = scAndStatisticsVec;
|
||||
auto* columnStatistics = chooseStatisticsToUse(columnStatisticsVec);
|
||||
return {{sc, columnStatistics}};
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Populates range bounds based on column statistics
|
||||
// Returns optional with bounds if successful, nullopt otherwise
|
||||
template <typename T>
|
||||
std::optional<FilterRangeBounds<T>> populateRangeBounds(execplan::SimpleColumn* keyColumn,
|
||||
optimizer::RBOptimizerContext& ctx)
|
||||
std::optional<FilterRangeBounds<T>> populateRangeBounds(Histogram_json_hb* columnStatistics)
|
||||
{
|
||||
cal_impl_if::SchemaAndTableName schemaAndTableName = {keyColumn->schemaName(), keyColumn->tableName()};
|
||||
auto tableColumnsStatisticsIt = ctx.gwi.tableStatisticsMap.find(schemaAndTableName);
|
||||
if (tableColumnsStatisticsIt == ctx.gwi.tableStatisticsMap.end())
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
auto columnStatisticsIt = tableColumnsStatisticsIt->second.find(keyColumn->columnName());
|
||||
if (columnStatisticsIt == tableColumnsStatisticsIt->second.end())
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
auto& [simpleColumn, columnStatisticsVec] = columnStatisticsIt->second;
|
||||
auto& columnStatistics = chooseStatisticsToUse(columnStatisticsVec);
|
||||
FilterRangeBounds<T> bounds;
|
||||
|
||||
// TODO configurable parallel factor via session variable
|
||||
// NB now histogram size is the way to control parallel factor with 16 being the maximum
|
||||
size_t numberOfUnionUnits = std::min(columnStatistics.get_json_histogram().size(), MaxParallelFactor);
|
||||
size_t numberOfBucketsPerUnionUnit = columnStatistics.get_json_histogram().size() / numberOfUnionUnits;
|
||||
std::cout << "populateRangeBounds() columnStatistics->buckets.size() " << columnStatistics->get_json_histogram().size()
|
||||
<< std::endl;
|
||||
size_t numberOfUnionUnits = std::min(columnStatistics->get_json_histogram().size(), MaxParallelFactor);
|
||||
size_t numberOfBucketsPerUnionUnit = columnStatistics->get_json_histogram().size() / numberOfUnionUnits;
|
||||
|
||||
std::cout << "Number of union units: " << numberOfUnionUnits << std::endl;
|
||||
std::cout << "Number of buckets per union unit: " << numberOfBucketsPerUnionUnit << std::endl;
|
||||
|
||||
FilterRangeBounds<T> bounds;
|
||||
|
||||
// Loop over buckets to produce filter ranges
|
||||
// NB Currently Histogram_json_hb has the last bucket that has end as its start
|
||||
for (size_t i = 0; i < numberOfUnionUnits - 1; ++i)
|
||||
{
|
||||
auto bucket = columnStatistics.get_json_histogram().begin() + i * numberOfBucketsPerUnionUnit;
|
||||
auto endBucket = columnStatistics.get_json_histogram().begin() + (i + 1) * numberOfBucketsPerUnionUnit;
|
||||
auto bucket = columnStatistics->get_json_histogram().begin() + i * numberOfBucketsPerUnionUnit;
|
||||
auto endBucket = columnStatistics->get_json_histogram().begin() + (i + 1) * numberOfBucketsPerUnionUnit;
|
||||
T currentLowerBound = *(uint32_t*)bucket->start_value.data();
|
||||
T currentUpperBound = *(uint32_t*)endBucket->start_value.data();
|
||||
bounds.push_back({currentLowerBound, currentUpperBound});
|
||||
}
|
||||
for (auto& bucket : columnStatistics.get_json_histogram())
|
||||
for (auto& bucket : columnStatistics->get_json_histogram())
|
||||
{
|
||||
T currentLowerBound = *(uint32_t*)bucket.start_value.data();
|
||||
std::cout << "Bucket: " << currentLowerBound << std::endl;
|
||||
}
|
||||
// auto penultimateBucket = columnStatistics.get_json_histogram().begin() + numberOfUnionUnits * numberOfBucketsPerUnionUnit;
|
||||
// T currentLowerBound = *(uint32_t*)penultimateBucket->start_value.data();
|
||||
// T currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data();
|
||||
// auto penultimateBucket = columnStatistics.get_json_histogram().begin() + numberOfUnionUnits *
|
||||
// numberOfBucketsPerUnionUnit; T currentLowerBound = *(uint32_t*)penultimateBucket->start_value.data(); T
|
||||
// currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data();
|
||||
// bounds.push_back({currentLowerBound, currentUpperBound});
|
||||
|
||||
|
||||
for (auto& bound : bounds)
|
||||
{
|
||||
std::cout << "Bound: " << bound.first << " " << bound.second << std::endl;
|
||||
@@ -247,14 +260,19 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
|
||||
|
||||
// SC type controls an integral type used to produce suitable filters. The continuation of this function
|
||||
// should become a template function based on SC type.
|
||||
execplan::SimpleColumn* keyColumn = findSuitableKeyColumn(csep, table, ctx);
|
||||
if (!keyColumn)
|
||||
auto keyColumnAndStatistics = chooseKeyColumnAndStatistics(table, ctx);
|
||||
if (!keyColumnAndStatistics)
|
||||
{
|
||||
return unionVec;
|
||||
}
|
||||
|
||||
auto& [keyColumn, columnStatistics] = keyColumnAndStatistics.value();
|
||||
|
||||
std::cout << "makeUnionFromTable keyColumn " << keyColumn.toString() << std::endl;
|
||||
std::cout << "makeUnionFromTable RC front " << csep.returnedCols().front()->toString() << std::endl;
|
||||
|
||||
// TODO char and other numerical types support
|
||||
auto boundsOpt = populateRangeBounds<uint64_t>(keyColumn, ctx);
|
||||
auto boundsOpt = populateRangeBounds<uint64_t>(columnStatistics);
|
||||
if (!boundsOpt.has_value())
|
||||
{
|
||||
return unionVec;
|
||||
@@ -269,15 +287,24 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
|
||||
{
|
||||
auto clonedCSEP = csep.cloneForTableWORecursiveSelects(table);
|
||||
// Add BETWEEN based on key column range
|
||||
clonedCSEP->filters(filtersWithNewRange(clonedCSEP, *keyColumn, bounds[i], false));
|
||||
auto filter = filtersWithNewRange(clonedCSEP, keyColumn, bounds[i], false);
|
||||
clonedCSEP->filters(filter);
|
||||
// To create CES filter we need to have a column in the column map
|
||||
clonedCSEP->columnMap().insert({keyColumn.columnName(), execplan::SRCP(keyColumn.clone())});
|
||||
unionVec.push_back(clonedCSEP);
|
||||
}
|
||||
}
|
||||
// This last bound produces low <= col <= high
|
||||
auto clonedCSEP = csep.cloneForTableWORecursiveSelects(table);
|
||||
clonedCSEP->filters(filtersWithNewRange(clonedCSEP, *keyColumn, bounds.back(), true));
|
||||
unionVec.push_back(clonedCSEP);
|
||||
|
||||
// TODO add NULLs into filter of the last step
|
||||
if (!bounds.empty())
|
||||
{
|
||||
auto clonedCSEP = csep.cloneForTableWORecursiveSelects(table);
|
||||
auto filter = filtersWithNewRange(clonedCSEP, keyColumn, bounds.back(), true);
|
||||
clonedCSEP->columnMap().insert({keyColumn.columnName(), execplan::SRCP(keyColumn.clone())});
|
||||
clonedCSEP->filters(filter);
|
||||
unionVec.push_back(clonedCSEP);
|
||||
}
|
||||
|
||||
return unionVec;
|
||||
}
|
||||
bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx)
|
||||
|
||||
Reference in New Issue
Block a user