You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-10-31 18:30:33 +03:00
chore(rbo,rules,QA): new extractColumnStatistics, some comments with a bit of re-factoring.
This commit is contained in:
@@ -2673,8 +2673,7 @@ CalpontSystemCatalog::ColType colType_MysqlToIDB(const Field* field)
|
|||||||
ct.colWidth = 8;
|
ct.colWidth = 8;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case STRING_RESULT:
|
case STRING_RESULT: ct.colDataType = CalpontSystemCatalog::VARCHAR;
|
||||||
ct.colDataType = CalpontSystemCatalog::VARCHAR;
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
IDEBUG(cerr << "colType_MysqlToIDB:: Unknown result type of MySQL " << item->result_type() << endl);
|
IDEBUG(cerr << "colType_MysqlToIDB:: Unknown result type of MySQL " << item->result_type() << endl);
|
||||||
@@ -5205,6 +5204,64 @@ void setExecutionParams(gp_walk_info& gwi, SCSEP& csep)
|
|||||||
csep->umMemLimit(get_um_mem_limit(gwi.thd) * 1024ULL * 1024);
|
csep->umMemLimit(get_um_mem_limit(gwi.thd) * 1024ULL * 1024);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Loop over available indexes to find and extract corresponding EI column statistics
|
||||||
|
// for the first column of the index if any.
|
||||||
|
// Statistics is stored in GWI context.
|
||||||
|
// Mock for ES 10.6
|
||||||
|
// TODO clean up extra logging when the feature is ready
|
||||||
|
#if MYSQL_VERSION_ID >= 110401
|
||||||
|
void extractColumnStatistics(TABLE_LIST* table_ptr, gp_walk_info& gwi)
|
||||||
|
{
|
||||||
|
for (uint j = 0; j < table_ptr->table->s->keys; j++)
|
||||||
|
{
|
||||||
|
{
|
||||||
|
Field* field = table_ptr->table->key_info[j].key_part[0].field;
|
||||||
|
std::cout << "j index " << j << " i column " << 0 << " fieldnr "
|
||||||
|
<< table_ptr->table->key_info[j].key_part[0].fieldnr << " " << field->field_name.str;
|
||||||
|
if (field->read_stats)
|
||||||
|
{
|
||||||
|
auto* histogram = dynamic_cast<Histogram_json_hb*>(field->read_stats->histogram);
|
||||||
|
if (histogram)
|
||||||
|
{
|
||||||
|
std::cout << " has stats with " << histogram->buckets.size() << " buckets";
|
||||||
|
SchemaAndTableName tableName = {field->table->s->db.str, field->table->s->table_name.str};
|
||||||
|
auto* sc = buildSimpleColumnFromFieldForStatistics(field, gwi);
|
||||||
|
std::cout << "sc with stats !!!!! " << sc->toString() << std::endl;
|
||||||
|
|
||||||
|
auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName);
|
||||||
|
if (tableStatisticsMapIt == gwi.tableStatisticsMap.end())
|
||||||
|
{
|
||||||
|
gwi.tableStatisticsMap[tableName][field->field_name.str] = {*sc, {histogram}};
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto columnStatisticsMapIt = tableStatisticsMapIt->second.find(field->field_name.str);
|
||||||
|
if (columnStatisticsMapIt == tableStatisticsMapIt->second.end())
|
||||||
|
{
|
||||||
|
tableStatisticsMapIt->second[field->field_name.str] = {*sc, {histogram}};
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto columnStatisticsVec = columnStatisticsMapIt->second.second;
|
||||||
|
columnStatisticsVec.push_back(histogram);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << " no stats ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
void extractColumnStatistics(Item_field* /*ifp*/, gp_walk_info& /*gwi*/)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*@brief Process FROM part of the query or sub-query */
|
/*@brief Process FROM part of the query or sub-query */
|
||||||
/***********************************************************
|
/***********************************************************
|
||||||
* DESCRIPTION:
|
* DESCRIPTION:
|
||||||
@@ -5302,51 +5359,8 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP&
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (uint j = 0; j < table_ptr->table->s->keys; j++)
|
// TODO move extractColumnStatistics up when statistics is supported in MCS
|
||||||
{
|
extractColumnStatistics(table_ptr, gwi);
|
||||||
{
|
|
||||||
Field* field = table_ptr->table->key_info[j].key_part[0].field;
|
|
||||||
std::cout << "j index " << j << " i column " << 0 << " fieldnr "
|
|
||||||
<< table_ptr->table->key_info[j].key_part[0].fieldnr << " " << field->field_name.str;
|
|
||||||
if (field->read_stats)
|
|
||||||
{
|
|
||||||
auto* histogram = dynamic_cast<Histogram_json_hb*>(field->read_stats->histogram);
|
|
||||||
if (histogram)
|
|
||||||
{
|
|
||||||
std::cout << " has stats with " << histogram->buckets.size() << " buckets";
|
|
||||||
SchemaAndTableName tableName = {field->table->s->db.str, field->table->s->table_name.str};
|
|
||||||
auto* sc = buildSimpleColumnFromFieldForStatistics(field, gwi);
|
|
||||||
std::cout << "sc with stats !!!!! " << sc->toString() << std::endl;
|
|
||||||
// execplan::SimpleColumn simpleColumn = {
|
|
||||||
// field->table->s->db.str, field->table->s->table_name.str, field->field_name.str, false};
|
|
||||||
|
|
||||||
auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName);
|
|
||||||
if (tableStatisticsMapIt == gwi.tableStatisticsMap.end())
|
|
||||||
{
|
|
||||||
gwi.tableStatisticsMap[tableName][field->field_name.str] = {*sc, {histogram}};
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
auto columnStatisticsMapIt = tableStatisticsMapIt->second.find(field->field_name.str);
|
|
||||||
if (columnStatisticsMapIt == tableStatisticsMapIt->second.end())
|
|
||||||
{
|
|
||||||
tableStatisticsMapIt->second[field->field_name.str] = {*sc, {histogram}};
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
auto columnStatisticsVec = columnStatisticsMapIt->second.second;
|
|
||||||
columnStatisticsVec.push_back(histogram);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::cout << " no stats ";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
string table_name = table_ptr->table_name.str;
|
string table_name = table_ptr->table_name.str;
|
||||||
|
|
||||||
@@ -6557,7 +6571,6 @@ int processSelect(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, vector
|
|||||||
case Item::FIELD_ITEM:
|
case Item::FIELD_ITEM:
|
||||||
{
|
{
|
||||||
Item_field* ifp = (Item_field*)item;
|
Item_field* ifp = (Item_field*)item;
|
||||||
// extractColumnStatistics(ifp, gwi);
|
|
||||||
// Handle * case
|
// Handle * case
|
||||||
if (ifp->field_name.length && string(ifp->field_name.str) == "*")
|
if (ifp->field_name.length && string(ifp->field_name.str) == "*")
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -79,11 +79,8 @@ bool someForeignTablesHasStatisticsAndMbIndex(execplan::CalpontSelectExecutionPl
|
|||||||
|
|
||||||
bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx)
|
bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx)
|
||||||
{
|
{
|
||||||
auto tables = csep.tableList();
|
// TODO filter out CSEPs with orderBy, groupBy, having || or clean up OB,GB,HAVING cloning CSEP
|
||||||
// This is leaf and there are no other tables at this level in neither UNION, nor derived table.
|
|
||||||
// TODO filter out CSEPs with orderBy, groupBy, having
|
|
||||||
// Filter out tables that were re-written.
|
// Filter out tables that were re-written.
|
||||||
// return tables.size() == 1 && !tables[0].isColumnstore() && !tableIsInUnion(tables[0], csep);
|
|
||||||
return someAreForeignTables(csep) && someForeignTablesHasStatisticsAndMbIndex(csep, ctx);
|
return someAreForeignTables(csep) && someForeignTablesHasStatisticsAndMbIndex(csep, ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -106,7 +103,7 @@ execplan::ParseTree* filtersWithNewRange(execplan::SCSEP& csep, execplan::Simple
|
|||||||
ltOp->resultType(ltOp->operationType());
|
ltOp->resultType(ltOp->operationType());
|
||||||
|
|
||||||
auto* sfr = new execplan::SimpleFilter(ltOp, tableKeyColumnLeftOp, filterColLeftOp);
|
auto* sfr = new execplan::SimpleFilter(ltOp, tableKeyColumnLeftOp, filterColLeftOp);
|
||||||
// TODO new
|
// TODO new
|
||||||
// TODO remove new and re-use tableKeyColumnLeftOp
|
// TODO remove new and re-use tableKeyColumnLeftOp
|
||||||
auto tableKeyColumnRightOp = new execplan::SimpleColumn(column);
|
auto tableKeyColumnRightOp = new execplan::SimpleColumn(column);
|
||||||
tableKeyColumnRightOp->resultType(column.resultType());
|
tableKeyColumnRightOp->resultType(column.resultType());
|
||||||
@@ -214,8 +211,8 @@ std::optional<FilterRangeBounds<T>> populateRangeBounds(Histogram_json_hb* colum
|
|||||||
|
|
||||||
// TODO configurable parallel factor via session variable
|
// TODO configurable parallel factor via session variable
|
||||||
// NB now histogram size is the way to control parallel factor with 16 being the maximum
|
// NB now histogram size is the way to control parallel factor with 16 being the maximum
|
||||||
std::cout << "populateRangeBounds() columnStatistics->buckets.size() " << columnStatistics->get_json_histogram().size()
|
std::cout << "populateRangeBounds() columnStatistics->buckets.size() "
|
||||||
<< std::endl;
|
<< columnStatistics->get_json_histogram().size() << std::endl;
|
||||||
size_t numberOfUnionUnits = std::min(columnStatistics->get_json_histogram().size(), MaxParallelFactor);
|
size_t numberOfUnionUnits = std::min(columnStatistics->get_json_histogram().size(), MaxParallelFactor);
|
||||||
size_t numberOfBucketsPerUnionUnit = columnStatistics->get_json_histogram().size() / numberOfUnionUnits;
|
size_t numberOfBucketsPerUnionUnit = columnStatistics->get_json_histogram().size() / numberOfUnionUnits;
|
||||||
|
|
||||||
@@ -237,12 +234,12 @@ std::optional<FilterRangeBounds<T>> populateRangeBounds(Histogram_json_hb* colum
|
|||||||
T currentLowerBound = *(uint32_t*)bucket.start_value.data();
|
T currentLowerBound = *(uint32_t*)bucket.start_value.data();
|
||||||
std::cout << "Bucket: " << currentLowerBound << std::endl;
|
std::cout << "Bucket: " << currentLowerBound << std::endl;
|
||||||
}
|
}
|
||||||
|
// TODO leave this here b/c there is a corresponding JIRA about the last upper range bound.
|
||||||
// auto penultimateBucket = columnStatistics.get_json_histogram().begin() + numberOfUnionUnits *
|
// auto penultimateBucket = columnStatistics.get_json_histogram().begin() + numberOfUnionUnits *
|
||||||
// numberOfBucketsPerUnionUnit; T currentLowerBound = *(uint32_t*)penultimateBucket->start_value.data(); T
|
// numberOfBucketsPerUnionUnit; T currentLowerBound = *(uint32_t*)penultimateBucket->start_value.data(); T
|
||||||
// currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data();
|
// currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data();
|
||||||
// bounds.push_back({currentLowerBound, currentUpperBound});
|
// bounds.push_back({currentLowerBound, currentUpperBound});
|
||||||
|
|
||||||
|
|
||||||
for (auto& bound : bounds)
|
for (auto& bound : bounds)
|
||||||
{
|
{
|
||||||
std::cout << "Bound: " << bound.first << " " << bound.second << std::endl;
|
std::cout << "Bound: " << bound.first << " " << bound.second << std::endl;
|
||||||
@@ -304,7 +301,7 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
|
|||||||
clonedCSEP->filters(filter);
|
clonedCSEP->filters(filter);
|
||||||
unionVec.push_back(clonedCSEP);
|
unionVec.push_back(clonedCSEP);
|
||||||
}
|
}
|
||||||
|
|
||||||
return unionVec;
|
return unionVec;
|
||||||
}
|
}
|
||||||
bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx)
|
bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx)
|
||||||
|
|||||||
Reference in New Issue
Block a user