1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-11-21 09:20:51 +03:00

chore(QA,plugin): integer PK columns are supported by QA.

This commit is contained in:
drrtuy
2025-10-21 13:24:51 +00:00
parent fb98e46bfc
commit 45ebe62bc8
4 changed files with 119 additions and 54 deletions

View File

@@ -333,6 +333,7 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, long t
if (ti.tpl_scan_ctx->rowsreturned == 0 &&
(ti.tpl_scan_ctx->traceFlags & execplan::CalpontSelectExecutionPlan::TRACE_TUPLE_OFF))
{
std::cout << "rowGroup->toString() " << rowGroup->toString() << std::endl;
for (uint32_t i = 0; i < rowGroup->getColumnCount(); i++)
{
int oid = rowGroup->getOIDs()[i];

View File

@@ -122,8 +122,9 @@ typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
struct ColumnStatistics
{
ColumnStatistics(execplan::SimpleColumn& column, std::vector<Histogram_json_hb*> histograms)
: column(column), histograms(histograms)
ColumnStatistics(execplan::SimpleColumn& column, std::vector<Histogram_json_hb*> histograms,
Field* minValue, Field* maxValue)
: column(column), histograms(histograms), minValue(minValue), maxValue(maxValue)
{
}
ColumnStatistics() = default;
@@ -133,15 +134,33 @@ struct ColumnStatistics
return histograms;
}
const Histogram_json_hb* getHistogram() const
{
if (histograms.empty())
return nullptr;
return histograms.front();
}
execplan::SimpleColumn& getColumn()
{
return column;
}
std::optional<int64_t> getIntMinValue() const
{
return (minValue) ? std::optional<int64_t>(minValue->val_int()) : std::nullopt;
}
std::optional<int64_t> getIntMaxValue() const
{
return (maxValue) ? std::optional<int64_t>(maxValue->val_int()) : std::nullopt;
}
private:
execplan::SimpleColumn column;
std::vector<Histogram_json_hb*> histograms;
Field* min{nullptr};
Field* max{nullptr};
Field* minValue{nullptr};
Field* maxValue{nullptr};
};
using ColumnName = std::string;
@@ -162,14 +181,23 @@ struct TableStatistics
auto tableStatisticsIt = tableStatistics_.find(tableName);
if (tableStatisticsIt == tableStatistics_.end())
{
tableStatistics_[tableName][fieldName] = {sc, {histogram}};
if (histogram)
{
tableStatistics_[tableName][fieldName] = {
sc, {histogram}, statistics->min_value, statistics->max_value};
}
else
{
tableStatistics_[tableName][fieldName] = {sc, {}, statistics->min_value, statistics->max_value};
}
}
else
{
auto columnStatisticsMapIt = tableStatisticsIt->second.find(fieldName);
if (columnStatisticsMapIt == tableStatisticsIt->second.end())
{
tableStatisticsIt->second[fieldName] = {sc, {histogram}};
tableStatisticsIt->second[fieldName] = {
sc, {histogram}, statistics->min_value, statistics->max_value};
}
else
{
@@ -202,6 +230,8 @@ struct TableStatistics
}
else
{
// Note: This algo overwrites histograms but shouldn't be a problem b/c
// statistics can't change.
for (auto& [columnName, histogram] : aColumnStatisticsMap)
{
tableStatisticsIt->second[columnName] = histogram;
@@ -210,6 +240,7 @@ struct TableStatistics
}
}
private:
TableStatisticsMap tableStatistics_;
};

View File

@@ -216,15 +216,9 @@ execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPl
return nullptr;
}
// TBD
Histogram_json_hb* chooseStatisticsToUse(std::vector<Histogram_json_hb*>& columnStatisticsVec)
{
return columnStatisticsVec.front();
}
// Looking for a projected column that comes first in an available index and has EI statistics
// INV nullptr signifies that no suitable column was found
std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyColumnAndStatistics(
cal_impl_if::ColumnStatistics* chooseKeyColumnAndStatistics(
execplan::CalpontSystemCatalog::TableAliasName& targetTable, optimizer::RBOptimizerContext& ctx)
{
cal_impl_if::SchemaAndTableName schemaAndTableName = {targetTable.schema, targetTable.table};
@@ -232,19 +226,17 @@ std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyC
auto tableColumnsStatistics = ctx.getGwi().tableStatistics.findStatisticsForATable(schemaAndTableName);
if (!tableColumnsStatistics)
{
return std::nullopt;
return nullptr;
}
// TODO take some column and some stats for it!!!
// TODO this algo now returns the first column and stats
// for it but it should consider all column available
for (auto& [columnName, columnStatistics] : tableColumnsStatistics.value())
{
auto& sc = columnStatistics.getColumn();
auto& columnStatisticsVec = columnStatistics.getHistograms();
auto* bestColumnStatistics = chooseStatisticsToUse(columnStatisticsVec);
return {{sc, bestColumnStatistics}};
return &columnStatistics;
}
return std::nullopt;
return nullptr;
}
} // namespace details
@@ -257,37 +249,32 @@ bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RB
return someAreForeignTables(csep) && someForeignTablesHasStatisticsAndMbIndex(csep, ctx);
}
// Populates range bounds based on column statistics
// Returns optional with bounds if successful, nullopt otherwise
uint64_t decodeU64(const std::string& bytes)
{
uint64_t v = 0;
const size_t n = std::min<size_t>(bytes.size(), sizeof(uint64_t));
if (n)
std::memcpy(&v, bytes.data(), n);
return v;
}
template <typename T>
std::optional<details::FilterRangeBounds<T>> populateRangeBounds(Histogram_json_hb* columnStatistics,
size_t& maxParallelFactor)
std::optional<details::FilterRangeBounds<T>> populateRangeBoundsFromHistogram(
cal_impl_if::ColumnStatistics& columnStatistics, size_t maxParallelFactor)
{
details::FilterRangeBounds<T> bounds;
// Guard: empty histogram
if (!columnStatistics || columnStatistics->get_json_histogram().empty())
return std::nullopt;
auto decodeU64 = [](const std::string& bytes) -> uint64_t
{
uint64_t v = 0;
const size_t n = std::min<size_t>(bytes.size(), sizeof(uint64_t));
if (n)
std::memcpy(&v, bytes.data(), n);
return v;
};
auto* histogram = columnStatistics.getHistogram();
// Get parallel factor from context
size_t numberOfUnionUnits = std::min(columnStatistics->get_json_histogram().size(), maxParallelFactor);
size_t numberOfBucketsPerUnionUnit = columnStatistics->get_json_histogram().size() / numberOfUnionUnits;
size_t numberOfUnionUnits = std::min(histogram->get_json_histogram().size(), maxParallelFactor);
size_t numberOfBucketsPerUnionUnit = histogram->get_json_histogram().size() / numberOfUnionUnits;
// Loop over buckets to produce filter ranges
// NB Currently Histogram_json_hb has the last bucket that has end as its start
for (size_t i = 0; i < numberOfUnionUnits - 1; ++i)
{
auto bucket = columnStatistics->get_json_histogram().begin() + i * numberOfBucketsPerUnionUnit;
auto endBucket = columnStatistics->get_json_histogram().begin() + (i + 1) * numberOfBucketsPerUnionUnit;
auto bucket = histogram->get_json_histogram().begin() + i * numberOfBucketsPerUnionUnit;
auto endBucket = histogram->get_json_histogram().begin() + (i + 1) * numberOfBucketsPerUnionUnit;
T currentLowerBound = static_cast<T>(decodeU64(bucket->start_value));
T currentUpperBound = static_cast<T>(decodeU64(endBucket->start_value));
bounds.push_back({currentLowerBound, currentUpperBound});
@@ -297,15 +284,15 @@ std::optional<details::FilterRangeBounds<T>> populateRangeBounds(Histogram_json_
if (numberOfUnionUnits >= 1)
{
auto lastChunkIndex = (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit;
if (lastChunkIndex < columnStatistics->get_json_histogram().size())
if (lastChunkIndex < histogram->get_json_histogram().size())
{
auto lastStartBucket = columnStatistics->get_json_histogram().begin() + lastChunkIndex;
auto lastStartBucket = histogram->get_json_histogram().begin() + lastChunkIndex;
T finalLowerBound = static_cast<T>(decodeU64(lastStartBucket->start_value));
T finalUpperBound = std::numeric_limits<T>::max();
if (!columnStatistics->get_last_bucket_end_endp().empty())
if (!histogram->get_last_bucket_end_endp().empty())
{
finalUpperBound = static_cast<T>(decodeU64(columnStatistics->get_last_bucket_end_endp()));
finalUpperBound = static_cast<T>(decodeU64(histogram->get_last_bucket_end_endp()));
}
bounds.push_back({finalLowerBound, finalUpperBound});
}
@@ -320,6 +307,56 @@ std::optional<details::FilterRangeBounds<T>> populateRangeBounds(Histogram_json_
return bounds;
}
template <typename T>
std::optional<details::FilterRangeBounds<T>> populateRangeBoundsFromEquallyDistributedRange(
cal_impl_if::ColumnStatistics& columnStatistics, size_t maxParallelFactor)
{
auto minValue = columnStatistics.getIntMinValue().value();
auto maxValue = columnStatistics.getIntMaxValue().value();
assert(maxValue >= minValue);
auto distance = maxValue - minValue;
auto step = distance / maxParallelFactor;
details::FilterRangeBounds<T> bounds;
for (size_t i = 0; i < maxParallelFactor; ++i)
{
bounds.push_back({minValue + i * step, minValue + (i + 1) * step});
}
if (!bounds.empty())
{
bounds.front().first = std::numeric_limits<T>::lowest();
bounds.back().second = maxValue;
}
return bounds;
}
// Populates range bounds based on column statistics
// Returns optional with bounds if successful, nullopt otherwise
template <typename T>
std::optional<details::FilterRangeBounds<T>> populateRangeBounds(
cal_impl_if::ColumnStatistics& columnStatistics, size_t& maxParallelFactor)
{
auto* histogram = columnStatistics.getHistogram();
// Guard: empty histogram or no min/max values
if (histogram && histogram->get_json_histogram().empty())
{
return populateRangeBoundsFromHistogram<T>(columnStatistics, maxParallelFactor);
}
auto minValue = columnStatistics.getIntMinValue();
auto maxValue = columnStatistics.getIntMaxValue();
if (minValue && maxValue)
{
return populateRangeBoundsFromEquallyDistributedRange<T>(columnStatistics, maxParallelFactor);
}
return std::nullopt;
}
// TODO char and other numerical types support
execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
execplan::CalpontSelectExecutionPlan& csep, execplan::CalpontSystemCatalog::TableAliasName& table,
@@ -329,15 +366,16 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
// SC type controls an integral type used to produce suitable filters. The continuation of this function
// should become a template function based on SC type.
auto keyColumnAndStatistics = chooseKeyColumnAndStatistics(table, ctx);
if (!keyColumnAndStatistics)
auto columnStatisticsPtr = chooseKeyColumnAndStatistics(table, ctx);
if (!columnStatisticsPtr)
{
return unionVec;
}
auto& [keyColumn, columnStatistics] = keyColumnAndStatistics.value();
auto& columnStatistics = *columnStatisticsPtr;
auto& keyColumn = columnStatistics.getColumn();
std::cout << "makeUnionFromTable keyColumn " << keyColumn.toString() << std::endl;
// std::cout << "makeUnionFromTable keyColumn " << keyColumn.toString() << std::endl;
std::cout << "makeUnionFromTable RC front " << csep.returnedCols().front()->toString() << std::endl;
// TODO char and other numerical types support

View File

@@ -91,11 +91,6 @@ execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPl
execplan::CalpontSystemCatalog::TableAliasName& targetTable,
optimizer::RBOptimizerContext& ctx);
std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyColumnAndStatistics(
execplan::CalpontSystemCatalog::TableAliasName& targetTable, optimizer::RBOptimizerContext& ctx);
Histogram_json_hb* chooseStatisticsToUse(const std::vector<Histogram_json_hb*>& statisticsVec);
} // namespace details
// Main functions