1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-11-21 09:20:51 +03:00

chore(QA,plugin): integer PK columns are supported by QA.

This commit is contained in:
drrtuy
2025-10-21 13:24:51 +00:00
parent fb98e46bfc
commit 45ebe62bc8
4 changed files with 119 additions and 54 deletions

View File

@@ -333,6 +333,7 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, long t
if (ti.tpl_scan_ctx->rowsreturned == 0 && if (ti.tpl_scan_ctx->rowsreturned == 0 &&
(ti.tpl_scan_ctx->traceFlags & execplan::CalpontSelectExecutionPlan::TRACE_TUPLE_OFF)) (ti.tpl_scan_ctx->traceFlags & execplan::CalpontSelectExecutionPlan::TRACE_TUPLE_OFF))
{ {
std::cout << "rowGroup->toString() " << rowGroup->toString() << std::endl;
for (uint32_t i = 0; i < rowGroup->getColumnCount(); i++) for (uint32_t i = 0; i < rowGroup->getColumnCount(); i++)
{ {
int oid = rowGroup->getOIDs()[i]; int oid = rowGroup->getOIDs()[i];

View File

@@ -122,8 +122,9 @@ typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
struct ColumnStatistics struct ColumnStatistics
{ {
ColumnStatistics(execplan::SimpleColumn& column, std::vector<Histogram_json_hb*> histograms) ColumnStatistics(execplan::SimpleColumn& column, std::vector<Histogram_json_hb*> histograms,
: column(column), histograms(histograms) Field* minValue, Field* maxValue)
: column(column), histograms(histograms), minValue(minValue), maxValue(maxValue)
{ {
} }
ColumnStatistics() = default; ColumnStatistics() = default;
@@ -133,15 +134,33 @@ struct ColumnStatistics
return histograms; return histograms;
} }
const Histogram_json_hb* getHistogram() const
{
if (histograms.empty())
return nullptr;
return histograms.front();
}
execplan::SimpleColumn& getColumn() execplan::SimpleColumn& getColumn()
{ {
return column; return column;
} }
std::optional<int64_t> getIntMinValue() const
{
return (minValue) ? std::optional<int64_t>(minValue->val_int()) : std::nullopt;
}
std::optional<int64_t> getIntMaxValue() const
{
return (maxValue) ? std::optional<int64_t>(maxValue->val_int()) : std::nullopt;
}
private:
execplan::SimpleColumn column; execplan::SimpleColumn column;
std::vector<Histogram_json_hb*> histograms; std::vector<Histogram_json_hb*> histograms;
Field* min{nullptr}; Field* minValue{nullptr};
Field* max{nullptr}; Field* maxValue{nullptr};
}; };
using ColumnName = std::string; using ColumnName = std::string;
@@ -162,14 +181,23 @@ struct TableStatistics
auto tableStatisticsIt = tableStatistics_.find(tableName); auto tableStatisticsIt = tableStatistics_.find(tableName);
if (tableStatisticsIt == tableStatistics_.end()) if (tableStatisticsIt == tableStatistics_.end())
{ {
tableStatistics_[tableName][fieldName] = {sc, {histogram}}; if (histogram)
{
tableStatistics_[tableName][fieldName] = {
sc, {histogram}, statistics->min_value, statistics->max_value};
}
else
{
tableStatistics_[tableName][fieldName] = {sc, {}, statistics->min_value, statistics->max_value};
}
} }
else else
{ {
auto columnStatisticsMapIt = tableStatisticsIt->second.find(fieldName); auto columnStatisticsMapIt = tableStatisticsIt->second.find(fieldName);
if (columnStatisticsMapIt == tableStatisticsIt->second.end()) if (columnStatisticsMapIt == tableStatisticsIt->second.end())
{ {
tableStatisticsIt->second[fieldName] = {sc, {histogram}}; tableStatisticsIt->second[fieldName] = {
sc, {histogram}, statistics->min_value, statistics->max_value};
} }
else else
{ {
@@ -202,6 +230,8 @@ struct TableStatistics
} }
else else
{ {
// Note: This algo overwrites histograms but shouldn't be a problem b/c
// statistics can't change.
for (auto& [columnName, histogram] : aColumnStatisticsMap) for (auto& [columnName, histogram] : aColumnStatisticsMap)
{ {
tableStatisticsIt->second[columnName] = histogram; tableStatisticsIt->second[columnName] = histogram;
@@ -210,6 +240,7 @@ struct TableStatistics
} }
} }
private:
TableStatisticsMap tableStatistics_; TableStatisticsMap tableStatistics_;
}; };

View File

@@ -216,15 +216,9 @@ execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPl
return nullptr; return nullptr;
} }
// TBD
Histogram_json_hb* chooseStatisticsToUse(std::vector<Histogram_json_hb*>& columnStatisticsVec)
{
return columnStatisticsVec.front();
}
// Looking for a projected column that comes first in an available index and has EI statistics // Looking for a projected column that comes first in an available index and has EI statistics
// INV nullptr signifies that no suitable column was found // INV nullptr signifies that no suitable column was found
std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyColumnAndStatistics( cal_impl_if::ColumnStatistics* chooseKeyColumnAndStatistics(
execplan::CalpontSystemCatalog::TableAliasName& targetTable, optimizer::RBOptimizerContext& ctx) execplan::CalpontSystemCatalog::TableAliasName& targetTable, optimizer::RBOptimizerContext& ctx)
{ {
cal_impl_if::SchemaAndTableName schemaAndTableName = {targetTable.schema, targetTable.table}; cal_impl_if::SchemaAndTableName schemaAndTableName = {targetTable.schema, targetTable.table};
@@ -232,19 +226,17 @@ std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyC
auto tableColumnsStatistics = ctx.getGwi().tableStatistics.findStatisticsForATable(schemaAndTableName); auto tableColumnsStatistics = ctx.getGwi().tableStatistics.findStatisticsForATable(schemaAndTableName);
if (!tableColumnsStatistics) if (!tableColumnsStatistics)
{ {
return std::nullopt; return nullptr;
} }
// TODO take some column and some stats for it!!! // TODO this algo now returns the first column and stats
// for it but it should consider all column available
for (auto& [columnName, columnStatistics] : tableColumnsStatistics.value()) for (auto& [columnName, columnStatistics] : tableColumnsStatistics.value())
{ {
auto& sc = columnStatistics.getColumn(); return &columnStatistics;
auto& columnStatisticsVec = columnStatistics.getHistograms();
auto* bestColumnStatistics = chooseStatisticsToUse(columnStatisticsVec);
return {{sc, bestColumnStatistics}};
} }
return std::nullopt; return nullptr;
} }
} // namespace details } // namespace details
@@ -257,37 +249,32 @@ bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RB
return someAreForeignTables(csep) && someForeignTablesHasStatisticsAndMbIndex(csep, ctx); return someAreForeignTables(csep) && someForeignTablesHasStatisticsAndMbIndex(csep, ctx);
} }
// Populates range bounds based on column statistics uint64_t decodeU64(const std::string& bytes)
// Returns optional with bounds if successful, nullopt otherwise
template <typename T>
std::optional<details::FilterRangeBounds<T>> populateRangeBounds(Histogram_json_hb* columnStatistics,
size_t& maxParallelFactor)
{ {
details::FilterRangeBounds<T> bounds;
// Guard: empty histogram
if (!columnStatistics || columnStatistics->get_json_histogram().empty())
return std::nullopt;
auto decodeU64 = [](const std::string& bytes) -> uint64_t
{
uint64_t v = 0; uint64_t v = 0;
const size_t n = std::min<size_t>(bytes.size(), sizeof(uint64_t)); const size_t n = std::min<size_t>(bytes.size(), sizeof(uint64_t));
if (n) if (n)
std::memcpy(&v, bytes.data(), n); std::memcpy(&v, bytes.data(), n);
return v; return v;
}; }
template <typename T>
std::optional<details::FilterRangeBounds<T>> populateRangeBoundsFromHistogram(
cal_impl_if::ColumnStatistics& columnStatistics, size_t maxParallelFactor)
{
details::FilterRangeBounds<T> bounds;
auto* histogram = columnStatistics.getHistogram();
// Get parallel factor from context // Get parallel factor from context
size_t numberOfUnionUnits = std::min(columnStatistics->get_json_histogram().size(), maxParallelFactor); size_t numberOfUnionUnits = std::min(histogram->get_json_histogram().size(), maxParallelFactor);
size_t numberOfBucketsPerUnionUnit = columnStatistics->get_json_histogram().size() / numberOfUnionUnits; size_t numberOfBucketsPerUnionUnit = histogram->get_json_histogram().size() / numberOfUnionUnits;
// Loop over buckets to produce filter ranges // Loop over buckets to produce filter ranges
// NB Currently Histogram_json_hb has the last bucket that has end as its start // NB Currently Histogram_json_hb has the last bucket that has end as its start
for (size_t i = 0; i < numberOfUnionUnits - 1; ++i) for (size_t i = 0; i < numberOfUnionUnits - 1; ++i)
{ {
auto bucket = columnStatistics->get_json_histogram().begin() + i * numberOfBucketsPerUnionUnit; auto bucket = histogram->get_json_histogram().begin() + i * numberOfBucketsPerUnionUnit;
auto endBucket = columnStatistics->get_json_histogram().begin() + (i + 1) * numberOfBucketsPerUnionUnit; auto endBucket = histogram->get_json_histogram().begin() + (i + 1) * numberOfBucketsPerUnionUnit;
T currentLowerBound = static_cast<T>(decodeU64(bucket->start_value)); T currentLowerBound = static_cast<T>(decodeU64(bucket->start_value));
T currentUpperBound = static_cast<T>(decodeU64(endBucket->start_value)); T currentUpperBound = static_cast<T>(decodeU64(endBucket->start_value));
bounds.push_back({currentLowerBound, currentUpperBound}); bounds.push_back({currentLowerBound, currentUpperBound});
@@ -297,15 +284,15 @@ std::optional<details::FilterRangeBounds<T>> populateRangeBounds(Histogram_json_
if (numberOfUnionUnits >= 1) if (numberOfUnionUnits >= 1)
{ {
auto lastChunkIndex = (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit; auto lastChunkIndex = (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit;
if (lastChunkIndex < columnStatistics->get_json_histogram().size()) if (lastChunkIndex < histogram->get_json_histogram().size())
{ {
auto lastStartBucket = columnStatistics->get_json_histogram().begin() + lastChunkIndex; auto lastStartBucket = histogram->get_json_histogram().begin() + lastChunkIndex;
T finalLowerBound = static_cast<T>(decodeU64(lastStartBucket->start_value)); T finalLowerBound = static_cast<T>(decodeU64(lastStartBucket->start_value));
T finalUpperBound = std::numeric_limits<T>::max(); T finalUpperBound = std::numeric_limits<T>::max();
if (!columnStatistics->get_last_bucket_end_endp().empty()) if (!histogram->get_last_bucket_end_endp().empty())
{ {
finalUpperBound = static_cast<T>(decodeU64(columnStatistics->get_last_bucket_end_endp())); finalUpperBound = static_cast<T>(decodeU64(histogram->get_last_bucket_end_endp()));
} }
bounds.push_back({finalLowerBound, finalUpperBound}); bounds.push_back({finalLowerBound, finalUpperBound});
} }
@@ -320,6 +307,56 @@ std::optional<details::FilterRangeBounds<T>> populateRangeBounds(Histogram_json_
return bounds; return bounds;
} }
template <typename T>
std::optional<details::FilterRangeBounds<T>> populateRangeBoundsFromEquallyDistributedRange(
cal_impl_if::ColumnStatistics& columnStatistics, size_t maxParallelFactor)
{
auto minValue = columnStatistics.getIntMinValue().value();
auto maxValue = columnStatistics.getIntMaxValue().value();
assert(maxValue >= minValue);
auto distance = maxValue - minValue;
auto step = distance / maxParallelFactor;
details::FilterRangeBounds<T> bounds;
for (size_t i = 0; i < maxParallelFactor; ++i)
{
bounds.push_back({minValue + i * step, minValue + (i + 1) * step});
}
if (!bounds.empty())
{
bounds.front().first = std::numeric_limits<T>::lowest();
bounds.back().second = maxValue;
}
return bounds;
}
// Populates range bounds based on column statistics
// Returns optional with bounds if successful, nullopt otherwise
template <typename T>
std::optional<details::FilterRangeBounds<T>> populateRangeBounds(
cal_impl_if::ColumnStatistics& columnStatistics, size_t& maxParallelFactor)
{
auto* histogram = columnStatistics.getHistogram();
// Guard: empty histogram or no min/max values
if (histogram && histogram->get_json_histogram().empty())
{
return populateRangeBoundsFromHistogram<T>(columnStatistics, maxParallelFactor);
}
auto minValue = columnStatistics.getIntMinValue();
auto maxValue = columnStatistics.getIntMaxValue();
if (minValue && maxValue)
{
return populateRangeBoundsFromEquallyDistributedRange<T>(columnStatistics, maxParallelFactor);
}
return std::nullopt;
}
// TODO char and other numerical types support // TODO char and other numerical types support
execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
execplan::CalpontSelectExecutionPlan& csep, execplan::CalpontSystemCatalog::TableAliasName& table, execplan::CalpontSelectExecutionPlan& csep, execplan::CalpontSystemCatalog::TableAliasName& table,
@@ -329,15 +366,16 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
// SC type controls an integral type used to produce suitable filters. The continuation of this function // SC type controls an integral type used to produce suitable filters. The continuation of this function
// should become a template function based on SC type. // should become a template function based on SC type.
auto keyColumnAndStatistics = chooseKeyColumnAndStatistics(table, ctx); auto columnStatisticsPtr = chooseKeyColumnAndStatistics(table, ctx);
if (!keyColumnAndStatistics) if (!columnStatisticsPtr)
{ {
return unionVec; return unionVec;
} }
auto& [keyColumn, columnStatistics] = keyColumnAndStatistics.value(); auto& columnStatistics = *columnStatisticsPtr;
auto& keyColumn = columnStatistics.getColumn();
std::cout << "makeUnionFromTable keyColumn " << keyColumn.toString() << std::endl; // std::cout << "makeUnionFromTable keyColumn " << keyColumn.toString() << std::endl;
std::cout << "makeUnionFromTable RC front " << csep.returnedCols().front()->toString() << std::endl; std::cout << "makeUnionFromTable RC front " << csep.returnedCols().front()->toString() << std::endl;
// TODO char and other numerical types support // TODO char and other numerical types support

View File

@@ -91,11 +91,6 @@ execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPl
execplan::CalpontSystemCatalog::TableAliasName& targetTable, execplan::CalpontSystemCatalog::TableAliasName& targetTable,
optimizer::RBOptimizerContext& ctx); optimizer::RBOptimizerContext& ctx);
std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyColumnAndStatistics(
execplan::CalpontSystemCatalog::TableAliasName& targetTable, optimizer::RBOptimizerContext& ctx);
Histogram_json_hb* chooseStatisticsToUse(const std::vector<Histogram_json_hb*>& statisticsVec);
} // namespace details } // namespace details
// Main functions // Main functions