You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-11-21 09:20:51 +03:00
chore(QA,plugin): integer PK columns are supported by QA.
This commit is contained in:
@@ -333,6 +333,7 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, long t
|
|||||||
if (ti.tpl_scan_ctx->rowsreturned == 0 &&
|
if (ti.tpl_scan_ctx->rowsreturned == 0 &&
|
||||||
(ti.tpl_scan_ctx->traceFlags & execplan::CalpontSelectExecutionPlan::TRACE_TUPLE_OFF))
|
(ti.tpl_scan_ctx->traceFlags & execplan::CalpontSelectExecutionPlan::TRACE_TUPLE_OFF))
|
||||||
{
|
{
|
||||||
|
std::cout << "rowGroup->toString() " << rowGroup->toString() << std::endl;
|
||||||
for (uint32_t i = 0; i < rowGroup->getColumnCount(); i++)
|
for (uint32_t i = 0; i < rowGroup->getColumnCount(); i++)
|
||||||
{
|
{
|
||||||
int oid = rowGroup->getOIDs()[i];
|
int oid = rowGroup->getOIDs()[i];
|
||||||
|
|||||||
@@ -122,8 +122,9 @@ typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
|
|||||||
|
|
||||||
struct ColumnStatistics
|
struct ColumnStatistics
|
||||||
{
|
{
|
||||||
ColumnStatistics(execplan::SimpleColumn& column, std::vector<Histogram_json_hb*> histograms)
|
ColumnStatistics(execplan::SimpleColumn& column, std::vector<Histogram_json_hb*> histograms,
|
||||||
: column(column), histograms(histograms)
|
Field* minValue, Field* maxValue)
|
||||||
|
: column(column), histograms(histograms), minValue(minValue), maxValue(maxValue)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
ColumnStatistics() = default;
|
ColumnStatistics() = default;
|
||||||
@@ -133,15 +134,33 @@ struct ColumnStatistics
|
|||||||
return histograms;
|
return histograms;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const Histogram_json_hb* getHistogram() const
|
||||||
|
{
|
||||||
|
if (histograms.empty())
|
||||||
|
return nullptr;
|
||||||
|
return histograms.front();
|
||||||
|
}
|
||||||
|
|
||||||
execplan::SimpleColumn& getColumn()
|
execplan::SimpleColumn& getColumn()
|
||||||
{
|
{
|
||||||
return column;
|
return column;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<int64_t> getIntMinValue() const
|
||||||
|
{
|
||||||
|
return (minValue) ? std::optional<int64_t>(minValue->val_int()) : std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<int64_t> getIntMaxValue() const
|
||||||
|
{
|
||||||
|
return (maxValue) ? std::optional<int64_t>(maxValue->val_int()) : std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
execplan::SimpleColumn column;
|
execplan::SimpleColumn column;
|
||||||
std::vector<Histogram_json_hb*> histograms;
|
std::vector<Histogram_json_hb*> histograms;
|
||||||
Field* min{nullptr};
|
Field* minValue{nullptr};
|
||||||
Field* max{nullptr};
|
Field* maxValue{nullptr};
|
||||||
};
|
};
|
||||||
|
|
||||||
using ColumnName = std::string;
|
using ColumnName = std::string;
|
||||||
@@ -162,14 +181,23 @@ struct TableStatistics
|
|||||||
auto tableStatisticsIt = tableStatistics_.find(tableName);
|
auto tableStatisticsIt = tableStatistics_.find(tableName);
|
||||||
if (tableStatisticsIt == tableStatistics_.end())
|
if (tableStatisticsIt == tableStatistics_.end())
|
||||||
{
|
{
|
||||||
tableStatistics_[tableName][fieldName] = {sc, {histogram}};
|
if (histogram)
|
||||||
|
{
|
||||||
|
tableStatistics_[tableName][fieldName] = {
|
||||||
|
sc, {histogram}, statistics->min_value, statistics->max_value};
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
tableStatistics_[tableName][fieldName] = {sc, {}, statistics->min_value, statistics->max_value};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
auto columnStatisticsMapIt = tableStatisticsIt->second.find(fieldName);
|
auto columnStatisticsMapIt = tableStatisticsIt->second.find(fieldName);
|
||||||
if (columnStatisticsMapIt == tableStatisticsIt->second.end())
|
if (columnStatisticsMapIt == tableStatisticsIt->second.end())
|
||||||
{
|
{
|
||||||
tableStatisticsIt->second[fieldName] = {sc, {histogram}};
|
tableStatisticsIt->second[fieldName] = {
|
||||||
|
sc, {histogram}, statistics->min_value, statistics->max_value};
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -202,6 +230,8 @@ struct TableStatistics
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
// Note: This algo overwrites histograms but shouldn't be a problem b/c
|
||||||
|
// statistics can't change.
|
||||||
for (auto& [columnName, histogram] : aColumnStatisticsMap)
|
for (auto& [columnName, histogram] : aColumnStatisticsMap)
|
||||||
{
|
{
|
||||||
tableStatisticsIt->second[columnName] = histogram;
|
tableStatisticsIt->second[columnName] = histogram;
|
||||||
@@ -210,6 +240,7 @@ struct TableStatistics
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
TableStatisticsMap tableStatistics_;
|
TableStatisticsMap tableStatistics_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -216,15 +216,9 @@ execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPl
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TBD
|
|
||||||
Histogram_json_hb* chooseStatisticsToUse(std::vector<Histogram_json_hb*>& columnStatisticsVec)
|
|
||||||
{
|
|
||||||
return columnStatisticsVec.front();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Looking for a projected column that comes first in an available index and has EI statistics
|
// Looking for a projected column that comes first in an available index and has EI statistics
|
||||||
// INV nullptr signifies that no suitable column was found
|
// INV nullptr signifies that no suitable column was found
|
||||||
std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyColumnAndStatistics(
|
cal_impl_if::ColumnStatistics* chooseKeyColumnAndStatistics(
|
||||||
execplan::CalpontSystemCatalog::TableAliasName& targetTable, optimizer::RBOptimizerContext& ctx)
|
execplan::CalpontSystemCatalog::TableAliasName& targetTable, optimizer::RBOptimizerContext& ctx)
|
||||||
{
|
{
|
||||||
cal_impl_if::SchemaAndTableName schemaAndTableName = {targetTable.schema, targetTable.table};
|
cal_impl_if::SchemaAndTableName schemaAndTableName = {targetTable.schema, targetTable.table};
|
||||||
@@ -232,19 +226,17 @@ std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyC
|
|||||||
auto tableColumnsStatistics = ctx.getGwi().tableStatistics.findStatisticsForATable(schemaAndTableName);
|
auto tableColumnsStatistics = ctx.getGwi().tableStatistics.findStatisticsForATable(schemaAndTableName);
|
||||||
if (!tableColumnsStatistics)
|
if (!tableColumnsStatistics)
|
||||||
{
|
{
|
||||||
return std::nullopt;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO take some column and some stats for it!!!
|
// TODO this algo now returns the first column and stats
|
||||||
|
// for it but it should consider all column available
|
||||||
for (auto& [columnName, columnStatistics] : tableColumnsStatistics.value())
|
for (auto& [columnName, columnStatistics] : tableColumnsStatistics.value())
|
||||||
{
|
{
|
||||||
auto& sc = columnStatistics.getColumn();
|
return &columnStatistics;
|
||||||
auto& columnStatisticsVec = columnStatistics.getHistograms();
|
|
||||||
auto* bestColumnStatistics = chooseStatisticsToUse(columnStatisticsVec);
|
|
||||||
return {{sc, bestColumnStatistics}};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::nullopt;
|
return nullptr;
|
||||||
}
|
}
|
||||||
} // namespace details
|
} // namespace details
|
||||||
|
|
||||||
@@ -257,37 +249,32 @@ bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RB
|
|||||||
return someAreForeignTables(csep) && someForeignTablesHasStatisticsAndMbIndex(csep, ctx);
|
return someAreForeignTables(csep) && someForeignTablesHasStatisticsAndMbIndex(csep, ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Populates range bounds based on column statistics
|
uint64_t decodeU64(const std::string& bytes)
|
||||||
// Returns optional with bounds if successful, nullopt otherwise
|
|
||||||
template <typename T>
|
|
||||||
std::optional<details::FilterRangeBounds<T>> populateRangeBounds(Histogram_json_hb* columnStatistics,
|
|
||||||
size_t& maxParallelFactor)
|
|
||||||
{
|
{
|
||||||
details::FilterRangeBounds<T> bounds;
|
|
||||||
|
|
||||||
// Guard: empty histogram
|
|
||||||
if (!columnStatistics || columnStatistics->get_json_histogram().empty())
|
|
||||||
return std::nullopt;
|
|
||||||
|
|
||||||
auto decodeU64 = [](const std::string& bytes) -> uint64_t
|
|
||||||
{
|
|
||||||
uint64_t v = 0;
|
uint64_t v = 0;
|
||||||
const size_t n = std::min<size_t>(bytes.size(), sizeof(uint64_t));
|
const size_t n = std::min<size_t>(bytes.size(), sizeof(uint64_t));
|
||||||
if (n)
|
if (n)
|
||||||
std::memcpy(&v, bytes.data(), n);
|
std::memcpy(&v, bytes.data(), n);
|
||||||
return v;
|
return v;
|
||||||
};
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
std::optional<details::FilterRangeBounds<T>> populateRangeBoundsFromHistogram(
|
||||||
|
cal_impl_if::ColumnStatistics& columnStatistics, size_t maxParallelFactor)
|
||||||
|
{
|
||||||
|
details::FilterRangeBounds<T> bounds;
|
||||||
|
auto* histogram = columnStatistics.getHistogram();
|
||||||
|
|
||||||
// Get parallel factor from context
|
// Get parallel factor from context
|
||||||
size_t numberOfUnionUnits = std::min(columnStatistics->get_json_histogram().size(), maxParallelFactor);
|
size_t numberOfUnionUnits = std::min(histogram->get_json_histogram().size(), maxParallelFactor);
|
||||||
size_t numberOfBucketsPerUnionUnit = columnStatistics->get_json_histogram().size() / numberOfUnionUnits;
|
size_t numberOfBucketsPerUnionUnit = histogram->get_json_histogram().size() / numberOfUnionUnits;
|
||||||
|
|
||||||
// Loop over buckets to produce filter ranges
|
// Loop over buckets to produce filter ranges
|
||||||
// NB Currently Histogram_json_hb has the last bucket that has end as its start
|
// NB Currently Histogram_json_hb has the last bucket that has end as its start
|
||||||
for (size_t i = 0; i < numberOfUnionUnits - 1; ++i)
|
for (size_t i = 0; i < numberOfUnionUnits - 1; ++i)
|
||||||
{
|
{
|
||||||
auto bucket = columnStatistics->get_json_histogram().begin() + i * numberOfBucketsPerUnionUnit;
|
auto bucket = histogram->get_json_histogram().begin() + i * numberOfBucketsPerUnionUnit;
|
||||||
auto endBucket = columnStatistics->get_json_histogram().begin() + (i + 1) * numberOfBucketsPerUnionUnit;
|
auto endBucket = histogram->get_json_histogram().begin() + (i + 1) * numberOfBucketsPerUnionUnit;
|
||||||
T currentLowerBound = static_cast<T>(decodeU64(bucket->start_value));
|
T currentLowerBound = static_cast<T>(decodeU64(bucket->start_value));
|
||||||
T currentUpperBound = static_cast<T>(decodeU64(endBucket->start_value));
|
T currentUpperBound = static_cast<T>(decodeU64(endBucket->start_value));
|
||||||
bounds.push_back({currentLowerBound, currentUpperBound});
|
bounds.push_back({currentLowerBound, currentUpperBound});
|
||||||
@@ -297,15 +284,15 @@ std::optional<details::FilterRangeBounds<T>> populateRangeBounds(Histogram_json_
|
|||||||
if (numberOfUnionUnits >= 1)
|
if (numberOfUnionUnits >= 1)
|
||||||
{
|
{
|
||||||
auto lastChunkIndex = (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit;
|
auto lastChunkIndex = (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit;
|
||||||
if (lastChunkIndex < columnStatistics->get_json_histogram().size())
|
if (lastChunkIndex < histogram->get_json_histogram().size())
|
||||||
{
|
{
|
||||||
auto lastStartBucket = columnStatistics->get_json_histogram().begin() + lastChunkIndex;
|
auto lastStartBucket = histogram->get_json_histogram().begin() + lastChunkIndex;
|
||||||
T finalLowerBound = static_cast<T>(decodeU64(lastStartBucket->start_value));
|
T finalLowerBound = static_cast<T>(decodeU64(lastStartBucket->start_value));
|
||||||
|
|
||||||
T finalUpperBound = std::numeric_limits<T>::max();
|
T finalUpperBound = std::numeric_limits<T>::max();
|
||||||
if (!columnStatistics->get_last_bucket_end_endp().empty())
|
if (!histogram->get_last_bucket_end_endp().empty())
|
||||||
{
|
{
|
||||||
finalUpperBound = static_cast<T>(decodeU64(columnStatistics->get_last_bucket_end_endp()));
|
finalUpperBound = static_cast<T>(decodeU64(histogram->get_last_bucket_end_endp()));
|
||||||
}
|
}
|
||||||
bounds.push_back({finalLowerBound, finalUpperBound});
|
bounds.push_back({finalLowerBound, finalUpperBound});
|
||||||
}
|
}
|
||||||
@@ -320,6 +307,56 @@ std::optional<details::FilterRangeBounds<T>> populateRangeBounds(Histogram_json_
|
|||||||
return bounds;
|
return bounds;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
std::optional<details::FilterRangeBounds<T>> populateRangeBoundsFromEquallyDistributedRange(
|
||||||
|
cal_impl_if::ColumnStatistics& columnStatistics, size_t maxParallelFactor)
|
||||||
|
{
|
||||||
|
auto minValue = columnStatistics.getIntMinValue().value();
|
||||||
|
auto maxValue = columnStatistics.getIntMaxValue().value();
|
||||||
|
|
||||||
|
assert(maxValue >= minValue);
|
||||||
|
auto distance = maxValue - minValue;
|
||||||
|
auto step = distance / maxParallelFactor;
|
||||||
|
|
||||||
|
details::FilterRangeBounds<T> bounds;
|
||||||
|
for (size_t i = 0; i < maxParallelFactor; ++i)
|
||||||
|
{
|
||||||
|
bounds.push_back({minValue + i * step, minValue + (i + 1) * step});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!bounds.empty())
|
||||||
|
{
|
||||||
|
bounds.front().first = std::numeric_limits<T>::lowest();
|
||||||
|
bounds.back().second = maxValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
return bounds;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Populates range bounds based on column statistics
|
||||||
|
// Returns optional with bounds if successful, nullopt otherwise
|
||||||
|
template <typename T>
|
||||||
|
std::optional<details::FilterRangeBounds<T>> populateRangeBounds(
|
||||||
|
cal_impl_if::ColumnStatistics& columnStatistics, size_t& maxParallelFactor)
|
||||||
|
{
|
||||||
|
auto* histogram = columnStatistics.getHistogram();
|
||||||
|
|
||||||
|
// Guard: empty histogram or no min/max values
|
||||||
|
if (histogram && histogram->get_json_histogram().empty())
|
||||||
|
{
|
||||||
|
return populateRangeBoundsFromHistogram<T>(columnStatistics, maxParallelFactor);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto minValue = columnStatistics.getIntMinValue();
|
||||||
|
auto maxValue = columnStatistics.getIntMaxValue();
|
||||||
|
if (minValue && maxValue)
|
||||||
|
{
|
||||||
|
return populateRangeBoundsFromEquallyDistributedRange<T>(columnStatistics, maxParallelFactor);
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
// TODO char and other numerical types support
|
// TODO char and other numerical types support
|
||||||
execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
|
execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
|
||||||
execplan::CalpontSelectExecutionPlan& csep, execplan::CalpontSystemCatalog::TableAliasName& table,
|
execplan::CalpontSelectExecutionPlan& csep, execplan::CalpontSystemCatalog::TableAliasName& table,
|
||||||
@@ -329,15 +366,16 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
|
|||||||
|
|
||||||
// SC type controls an integral type used to produce suitable filters. The continuation of this function
|
// SC type controls an integral type used to produce suitable filters. The continuation of this function
|
||||||
// should become a template function based on SC type.
|
// should become a template function based on SC type.
|
||||||
auto keyColumnAndStatistics = chooseKeyColumnAndStatistics(table, ctx);
|
auto columnStatisticsPtr = chooseKeyColumnAndStatistics(table, ctx);
|
||||||
if (!keyColumnAndStatistics)
|
if (!columnStatisticsPtr)
|
||||||
{
|
{
|
||||||
return unionVec;
|
return unionVec;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto& [keyColumn, columnStatistics] = keyColumnAndStatistics.value();
|
auto& columnStatistics = *columnStatisticsPtr;
|
||||||
|
auto& keyColumn = columnStatistics.getColumn();
|
||||||
|
|
||||||
std::cout << "makeUnionFromTable keyColumn " << keyColumn.toString() << std::endl;
|
// std::cout << "makeUnionFromTable keyColumn " << keyColumn.toString() << std::endl;
|
||||||
std::cout << "makeUnionFromTable RC front " << csep.returnedCols().front()->toString() << std::endl;
|
std::cout << "makeUnionFromTable RC front " << csep.returnedCols().front()->toString() << std::endl;
|
||||||
|
|
||||||
// TODO char and other numerical types support
|
// TODO char and other numerical types support
|
||||||
|
|||||||
@@ -91,11 +91,6 @@ execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPl
|
|||||||
execplan::CalpontSystemCatalog::TableAliasName& targetTable,
|
execplan::CalpontSystemCatalog::TableAliasName& targetTable,
|
||||||
optimizer::RBOptimizerContext& ctx);
|
optimizer::RBOptimizerContext& ctx);
|
||||||
|
|
||||||
std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyColumnAndStatistics(
|
|
||||||
execplan::CalpontSystemCatalog::TableAliasName& targetTable, optimizer::RBOptimizerContext& ctx);
|
|
||||||
|
|
||||||
Histogram_json_hb* chooseStatisticsToUse(const std::vector<Histogram_json_hb*>& statisticsVec);
|
|
||||||
|
|
||||||
} // namespace details
|
} // namespace details
|
||||||
|
|
||||||
// Main functions
|
// Main functions
|
||||||
|
|||||||
Reference in New Issue
Block a user