You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-11-21 09:20:51 +03:00
chore(QA,plugin): integer PK columns are supported by QA.
This commit is contained in:
@@ -333,6 +333,7 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, long t
|
||||
if (ti.tpl_scan_ctx->rowsreturned == 0 &&
|
||||
(ti.tpl_scan_ctx->traceFlags & execplan::CalpontSelectExecutionPlan::TRACE_TUPLE_OFF))
|
||||
{
|
||||
std::cout << "rowGroup->toString() " << rowGroup->toString() << std::endl;
|
||||
for (uint32_t i = 0; i < rowGroup->getColumnCount(); i++)
|
||||
{
|
||||
int oid = rowGroup->getOIDs()[i];
|
||||
|
||||
@@ -122,8 +122,9 @@ typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
|
||||
|
||||
struct ColumnStatistics
|
||||
{
|
||||
ColumnStatistics(execplan::SimpleColumn& column, std::vector<Histogram_json_hb*> histograms)
|
||||
: column(column), histograms(histograms)
|
||||
ColumnStatistics(execplan::SimpleColumn& column, std::vector<Histogram_json_hb*> histograms,
|
||||
Field* minValue, Field* maxValue)
|
||||
: column(column), histograms(histograms), minValue(minValue), maxValue(maxValue)
|
||||
{
|
||||
}
|
||||
ColumnStatistics() = default;
|
||||
@@ -133,15 +134,33 @@ struct ColumnStatistics
|
||||
return histograms;
|
||||
}
|
||||
|
||||
const Histogram_json_hb* getHistogram() const
|
||||
{
|
||||
if (histograms.empty())
|
||||
return nullptr;
|
||||
return histograms.front();
|
||||
}
|
||||
|
||||
execplan::SimpleColumn& getColumn()
|
||||
{
|
||||
return column;
|
||||
}
|
||||
|
||||
std::optional<int64_t> getIntMinValue() const
|
||||
{
|
||||
return (minValue) ? std::optional<int64_t>(minValue->val_int()) : std::nullopt;
|
||||
}
|
||||
|
||||
std::optional<int64_t> getIntMaxValue() const
|
||||
{
|
||||
return (maxValue) ? std::optional<int64_t>(maxValue->val_int()) : std::nullopt;
|
||||
}
|
||||
|
||||
private:
|
||||
execplan::SimpleColumn column;
|
||||
std::vector<Histogram_json_hb*> histograms;
|
||||
Field* min{nullptr};
|
||||
Field* max{nullptr};
|
||||
Field* minValue{nullptr};
|
||||
Field* maxValue{nullptr};
|
||||
};
|
||||
|
||||
using ColumnName = std::string;
|
||||
@@ -162,14 +181,23 @@ struct TableStatistics
|
||||
auto tableStatisticsIt = tableStatistics_.find(tableName);
|
||||
if (tableStatisticsIt == tableStatistics_.end())
|
||||
{
|
||||
tableStatistics_[tableName][fieldName] = {sc, {histogram}};
|
||||
if (histogram)
|
||||
{
|
||||
tableStatistics_[tableName][fieldName] = {
|
||||
sc, {histogram}, statistics->min_value, statistics->max_value};
|
||||
}
|
||||
else
|
||||
{
|
||||
tableStatistics_[tableName][fieldName] = {sc, {}, statistics->min_value, statistics->max_value};
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto columnStatisticsMapIt = tableStatisticsIt->second.find(fieldName);
|
||||
if (columnStatisticsMapIt == tableStatisticsIt->second.end())
|
||||
{
|
||||
tableStatisticsIt->second[fieldName] = {sc, {histogram}};
|
||||
tableStatisticsIt->second[fieldName] = {
|
||||
sc, {histogram}, statistics->min_value, statistics->max_value};
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -202,6 +230,8 @@ struct TableStatistics
|
||||
}
|
||||
else
|
||||
{
|
||||
// Note: This algo overwrites histograms but shouldn't be a problem b/c
|
||||
// statistics can't change.
|
||||
for (auto& [columnName, histogram] : aColumnStatisticsMap)
|
||||
{
|
||||
tableStatisticsIt->second[columnName] = histogram;
|
||||
@@ -210,6 +240,7 @@ struct TableStatistics
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
TableStatisticsMap tableStatistics_;
|
||||
};
|
||||
|
||||
|
||||
@@ -216,15 +216,9 @@ execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPl
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// TBD
|
||||
Histogram_json_hb* chooseStatisticsToUse(std::vector<Histogram_json_hb*>& columnStatisticsVec)
|
||||
{
|
||||
return columnStatisticsVec.front();
|
||||
}
|
||||
|
||||
// Looking for a projected column that comes first in an available index and has EI statistics
|
||||
// INV nullptr signifies that no suitable column was found
|
||||
std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyColumnAndStatistics(
|
||||
cal_impl_if::ColumnStatistics* chooseKeyColumnAndStatistics(
|
||||
execplan::CalpontSystemCatalog::TableAliasName& targetTable, optimizer::RBOptimizerContext& ctx)
|
||||
{
|
||||
cal_impl_if::SchemaAndTableName schemaAndTableName = {targetTable.schema, targetTable.table};
|
||||
@@ -232,19 +226,17 @@ std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyC
|
||||
auto tableColumnsStatistics = ctx.getGwi().tableStatistics.findStatisticsForATable(schemaAndTableName);
|
||||
if (!tableColumnsStatistics)
|
||||
{
|
||||
return std::nullopt;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// TODO take some column and some stats for it!!!
|
||||
// TODO this algo now returns the first column and stats
|
||||
// for it but it should consider all column available
|
||||
for (auto& [columnName, columnStatistics] : tableColumnsStatistics.value())
|
||||
{
|
||||
auto& sc = columnStatistics.getColumn();
|
||||
auto& columnStatisticsVec = columnStatistics.getHistograms();
|
||||
auto* bestColumnStatistics = chooseStatisticsToUse(columnStatisticsVec);
|
||||
return {{sc, bestColumnStatistics}};
|
||||
return &columnStatistics;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace details
|
||||
|
||||
@@ -257,37 +249,32 @@ bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RB
|
||||
return someAreForeignTables(csep) && someForeignTablesHasStatisticsAndMbIndex(csep, ctx);
|
||||
}
|
||||
|
||||
// Populates range bounds based on column statistics
|
||||
// Returns optional with bounds if successful, nullopt otherwise
|
||||
template <typename T>
|
||||
std::optional<details::FilterRangeBounds<T>> populateRangeBounds(Histogram_json_hb* columnStatistics,
|
||||
size_t& maxParallelFactor)
|
||||
uint64_t decodeU64(const std::string& bytes)
|
||||
{
|
||||
details::FilterRangeBounds<T> bounds;
|
||||
|
||||
// Guard: empty histogram
|
||||
if (!columnStatistics || columnStatistics->get_json_histogram().empty())
|
||||
return std::nullopt;
|
||||
|
||||
auto decodeU64 = [](const std::string& bytes) -> uint64_t
|
||||
{
|
||||
uint64_t v = 0;
|
||||
const size_t n = std::min<size_t>(bytes.size(), sizeof(uint64_t));
|
||||
if (n)
|
||||
std::memcpy(&v, bytes.data(), n);
|
||||
return v;
|
||||
};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::optional<details::FilterRangeBounds<T>> populateRangeBoundsFromHistogram(
|
||||
cal_impl_if::ColumnStatistics& columnStatistics, size_t maxParallelFactor)
|
||||
{
|
||||
details::FilterRangeBounds<T> bounds;
|
||||
auto* histogram = columnStatistics.getHistogram();
|
||||
|
||||
// Get parallel factor from context
|
||||
size_t numberOfUnionUnits = std::min(columnStatistics->get_json_histogram().size(), maxParallelFactor);
|
||||
size_t numberOfBucketsPerUnionUnit = columnStatistics->get_json_histogram().size() / numberOfUnionUnits;
|
||||
size_t numberOfUnionUnits = std::min(histogram->get_json_histogram().size(), maxParallelFactor);
|
||||
size_t numberOfBucketsPerUnionUnit = histogram->get_json_histogram().size() / numberOfUnionUnits;
|
||||
|
||||
// Loop over buckets to produce filter ranges
|
||||
// NB Currently Histogram_json_hb has the last bucket that has end as its start
|
||||
for (size_t i = 0; i < numberOfUnionUnits - 1; ++i)
|
||||
{
|
||||
auto bucket = columnStatistics->get_json_histogram().begin() + i * numberOfBucketsPerUnionUnit;
|
||||
auto endBucket = columnStatistics->get_json_histogram().begin() + (i + 1) * numberOfBucketsPerUnionUnit;
|
||||
auto bucket = histogram->get_json_histogram().begin() + i * numberOfBucketsPerUnionUnit;
|
||||
auto endBucket = histogram->get_json_histogram().begin() + (i + 1) * numberOfBucketsPerUnionUnit;
|
||||
T currentLowerBound = static_cast<T>(decodeU64(bucket->start_value));
|
||||
T currentUpperBound = static_cast<T>(decodeU64(endBucket->start_value));
|
||||
bounds.push_back({currentLowerBound, currentUpperBound});
|
||||
@@ -297,15 +284,15 @@ std::optional<details::FilterRangeBounds<T>> populateRangeBounds(Histogram_json_
|
||||
if (numberOfUnionUnits >= 1)
|
||||
{
|
||||
auto lastChunkIndex = (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit;
|
||||
if (lastChunkIndex < columnStatistics->get_json_histogram().size())
|
||||
if (lastChunkIndex < histogram->get_json_histogram().size())
|
||||
{
|
||||
auto lastStartBucket = columnStatistics->get_json_histogram().begin() + lastChunkIndex;
|
||||
auto lastStartBucket = histogram->get_json_histogram().begin() + lastChunkIndex;
|
||||
T finalLowerBound = static_cast<T>(decodeU64(lastStartBucket->start_value));
|
||||
|
||||
T finalUpperBound = std::numeric_limits<T>::max();
|
||||
if (!columnStatistics->get_last_bucket_end_endp().empty())
|
||||
if (!histogram->get_last_bucket_end_endp().empty())
|
||||
{
|
||||
finalUpperBound = static_cast<T>(decodeU64(columnStatistics->get_last_bucket_end_endp()));
|
||||
finalUpperBound = static_cast<T>(decodeU64(histogram->get_last_bucket_end_endp()));
|
||||
}
|
||||
bounds.push_back({finalLowerBound, finalUpperBound});
|
||||
}
|
||||
@@ -320,6 +307,56 @@ std::optional<details::FilterRangeBounds<T>> populateRangeBounds(Histogram_json_
|
||||
return bounds;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::optional<details::FilterRangeBounds<T>> populateRangeBoundsFromEquallyDistributedRange(
|
||||
cal_impl_if::ColumnStatistics& columnStatistics, size_t maxParallelFactor)
|
||||
{
|
||||
auto minValue = columnStatistics.getIntMinValue().value();
|
||||
auto maxValue = columnStatistics.getIntMaxValue().value();
|
||||
|
||||
assert(maxValue >= minValue);
|
||||
auto distance = maxValue - minValue;
|
||||
auto step = distance / maxParallelFactor;
|
||||
|
||||
details::FilterRangeBounds<T> bounds;
|
||||
for (size_t i = 0; i < maxParallelFactor; ++i)
|
||||
{
|
||||
bounds.push_back({minValue + i * step, minValue + (i + 1) * step});
|
||||
}
|
||||
|
||||
if (!bounds.empty())
|
||||
{
|
||||
bounds.front().first = std::numeric_limits<T>::lowest();
|
||||
bounds.back().second = maxValue;
|
||||
}
|
||||
|
||||
return bounds;
|
||||
}
|
||||
|
||||
// Populates range bounds based on column statistics
|
||||
// Returns optional with bounds if successful, nullopt otherwise
|
||||
template <typename T>
|
||||
std::optional<details::FilterRangeBounds<T>> populateRangeBounds(
|
||||
cal_impl_if::ColumnStatistics& columnStatistics, size_t& maxParallelFactor)
|
||||
{
|
||||
auto* histogram = columnStatistics.getHistogram();
|
||||
|
||||
// Guard: empty histogram or no min/max values
|
||||
if (histogram && histogram->get_json_histogram().empty())
|
||||
{
|
||||
return populateRangeBoundsFromHistogram<T>(columnStatistics, maxParallelFactor);
|
||||
}
|
||||
|
||||
auto minValue = columnStatistics.getIntMinValue();
|
||||
auto maxValue = columnStatistics.getIntMaxValue();
|
||||
if (minValue && maxValue)
|
||||
{
|
||||
return populateRangeBoundsFromEquallyDistributedRange<T>(columnStatistics, maxParallelFactor);
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// TODO char and other numerical types support
|
||||
execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
|
||||
execplan::CalpontSelectExecutionPlan& csep, execplan::CalpontSystemCatalog::TableAliasName& table,
|
||||
@@ -329,15 +366,16 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
|
||||
|
||||
// SC type controls an integral type used to produce suitable filters. The continuation of this function
|
||||
// should become a template function based on SC type.
|
||||
auto keyColumnAndStatistics = chooseKeyColumnAndStatistics(table, ctx);
|
||||
if (!keyColumnAndStatistics)
|
||||
auto columnStatisticsPtr = chooseKeyColumnAndStatistics(table, ctx);
|
||||
if (!columnStatisticsPtr)
|
||||
{
|
||||
return unionVec;
|
||||
}
|
||||
|
||||
auto& [keyColumn, columnStatistics] = keyColumnAndStatistics.value();
|
||||
auto& columnStatistics = *columnStatisticsPtr;
|
||||
auto& keyColumn = columnStatistics.getColumn();
|
||||
|
||||
std::cout << "makeUnionFromTable keyColumn " << keyColumn.toString() << std::endl;
|
||||
// std::cout << "makeUnionFromTable keyColumn " << keyColumn.toString() << std::endl;
|
||||
std::cout << "makeUnionFromTable RC front " << csep.returnedCols().front()->toString() << std::endl;
|
||||
|
||||
// TODO char and other numerical types support
|
||||
|
||||
@@ -91,11 +91,6 @@ execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPl
|
||||
execplan::CalpontSystemCatalog::TableAliasName& targetTable,
|
||||
optimizer::RBOptimizerContext& ctx);
|
||||
|
||||
std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyColumnAndStatistics(
|
||||
execplan::CalpontSystemCatalog::TableAliasName& targetTable, optimizer::RBOptimizerContext& ctx);
|
||||
|
||||
Histogram_json_hb* chooseStatisticsToUse(const std::vector<Histogram_json_hb*>& statisticsVec);
|
||||
|
||||
} // namespace details
|
||||
|
||||
// Main functions
|
||||
|
||||
Reference in New Issue
Block a user