You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-08-05 16:15:50 +03:00
feat(optimizer,rules): use EI statistics for range filters
This commit is contained in:
@@ -6321,7 +6321,7 @@ void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi)
|
|||||||
if (histogram)
|
if (histogram)
|
||||||
{
|
{
|
||||||
std::cout << "Type of histogram object: " << typeid(*histogram).name() << std::endl;
|
std::cout << "Type of histogram object: " << typeid(*histogram).name() << std::endl;
|
||||||
std::vector<Histogram_bucket> histogramBuckets = histogram->get_histogram();
|
std::vector<Histogram_bucket> histogramBuckets = histogram->get_json_histogram();
|
||||||
std::cout << "gwi.columnStatisticsMap[ifp->field->field_name.str].size() " << histogramBuckets.size() << std::endl;
|
std::cout << "gwi.columnStatisticsMap[ifp->field->field_name.str].size() " << histogramBuckets.size() << std::endl;
|
||||||
gwi.columnStatisticsMap[ifp->field->field_name.str] = histogramBuckets;
|
gwi.columnStatisticsMap[ifp->field->field_name.str] = histogramBuckets;
|
||||||
}
|
}
|
||||||
|
@@ -202,10 +202,11 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
|
|||||||
execplan::CalpontSelectExecutionPlan::SelectList unionVec;
|
execplan::CalpontSelectExecutionPlan::SelectList unionVec;
|
||||||
// unionVec.reserve(numberOfLegs);
|
// unionVec.reserve(numberOfLegs);
|
||||||
execplan::SimpleColumn* keyColumn = findSuitableKeyColumn(csep);
|
execplan::SimpleColumn* keyColumn = findSuitableKeyColumn(csep);
|
||||||
std::cout << "looking for " << keyColumn->columnName() << " in ctx.gwi.columnStatisticsMap " << " with size " << ctx.gwi.columnStatisticsMap.size() << std::endl;
|
std::cout << "looking for " << keyColumn->columnName() << " in ctx.gwi.columnStatisticsMap "
|
||||||
|
<< " with size " << ctx.gwi.columnStatisticsMap.size() << std::endl;
|
||||||
for (auto& [k, v] : ctx.gwi.columnStatisticsMap)
|
for (auto& [k, v] : ctx.gwi.columnStatisticsMap)
|
||||||
{
|
{
|
||||||
std::cout << "key " << k << std::endl;
|
std::cout << "key " << k << " vector size " << v.size() << std::endl;
|
||||||
}
|
}
|
||||||
if (!keyColumn ||
|
if (!keyColumn ||
|
||||||
ctx.gwi.columnStatisticsMap.find(keyColumn->columnName()) == ctx.gwi.columnStatisticsMap.end())
|
ctx.gwi.columnStatisticsMap.find(keyColumn->columnName()) == ctx.gwi.columnStatisticsMap.end())
|
||||||
@@ -216,16 +217,33 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
|
|||||||
auto columnStatistics = ctx.gwi.columnStatisticsMap[keyColumn->columnName()];
|
auto columnStatistics = ctx.gwi.columnStatisticsMap[keyColumn->columnName()];
|
||||||
std::cout << "columnStatistics.size() " << columnStatistics.size() << std::endl;
|
std::cout << "columnStatistics.size() " << columnStatistics.size() << std::endl;
|
||||||
// TODO char and other numerical types support
|
// TODO char and other numerical types support
|
||||||
|
size_t numberOfUnionUnits = 2;
|
||||||
|
size_t numberOfBucketsPerUnionUnit = columnStatistics.size() / numberOfUnionUnits;
|
||||||
|
|
||||||
std::vector<std::pair<uint64_t, uint64_t>> bounds;
|
std::vector<std::pair<uint64_t, uint64_t>> bounds;
|
||||||
std::transform(columnStatistics.begin(), columnStatistics.end(), std::back_inserter(bounds),
|
|
||||||
[](const auto& bucket)
|
// TODO need to process tail if number of buckets is not divisible by number of union units
|
||||||
{
|
// TODO non-overlapping buckets if it is a problem at all
|
||||||
uint64_t lowerBound = std::stoul(bucket.start_value);
|
for (size_t i = 0; i < numberOfUnionUnits; ++i)
|
||||||
uint64_t upperBound = lowerBound + bucket.ndv;
|
{
|
||||||
return std::make_pair(lowerBound, upperBound);
|
auto bucket = columnStatistics.begin() + i * numberOfBucketsPerUnionUnit;
|
||||||
});
|
auto endBucket = columnStatistics.begin() + (i + 1) * numberOfBucketsPerUnionUnit;
|
||||||
// std::vector<std::pair<uint64_t, uint64_t>> bounds({{0, 3000961},
|
// TODO find a median b/w the current bucket start and the previous bucket end
|
||||||
// // {3000961, std::numeric_limits<uint64_t>::max()}});
|
uint64_t currentLowerBound =
|
||||||
|
(bounds.empty() ? *(uint32_t*)bucket->start_value.data()
|
||||||
|
: std::min((uint64_t)*(uint32_t*)bucket->start_value.data(), bounds.back().second));
|
||||||
|
uint64_t currentUpperBound = currentLowerBound;
|
||||||
|
for (; bucket != endBucket; ++bucket)
|
||||||
|
{
|
||||||
|
uint64_t bucketLowerBound = *(uint32_t*)bucket->start_value.data();
|
||||||
|
std::cout << "bucket.start_value " << bucketLowerBound << std::endl;
|
||||||
|
currentUpperBound = bucketLowerBound + bucket->ndv;
|
||||||
|
}
|
||||||
|
std::cout << "currentLowerBound " << currentLowerBound << " currentUpperBound " << currentUpperBound
|
||||||
|
<< std::endl;
|
||||||
|
bounds.push_back(std::make_pair(currentLowerBound, currentUpperBound));
|
||||||
|
}
|
||||||
|
|
||||||
for (auto& bound : bounds)
|
for (auto& bound : bounds)
|
||||||
{
|
{
|
||||||
auto clonedCSEP = csep.cloneWORecursiveSelects();
|
auto clonedCSEP = csep.cloneWORecursiveSelects();
|
||||||
|
Reference in New Issue
Block a user