1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-10-31 18:30:33 +03:00

chore(rbo): MCOL-6143: Settle down rbo as a separate lib for Unittesting (#3708)

* fix builds

* MCOL-6143: rbo as a separate library

* Move get_unstable_optimizer out of rbo, move findStatisticsForATable to header bo be (dirty fix, but cuts the corner)

* Add some helpers to a headerfile for unittesting

* Simple unittests with some mocks
This commit is contained in:
Leonid Fedorov
2025-08-15 15:28:28 +04:00
committed by GitHub
parent 08d89fcef7
commit 14fe4401bc
15 changed files with 671 additions and 83 deletions

View File

@@ -62,18 +62,13 @@ columnstore_install_file(${CMAKE_CURRENT_BINARY_DIR}/gitversionEngine ${ENGINE_S
set(COMPONENTS
utils
oam/oamcpp
dbcon/execplan
dbcon/joblist
dbcon
versioning
oam
writeengine/wrapper
writeengine/client
writeengine/xml
writeengine/redistribute
dbcon/ddlpackage
dbcon/ddlpackageproc
dbcon/dmlpackage
dbcon/dmlpackageproc
ddlproc
dmlproc
oamapps

View File

@@ -572,7 +572,7 @@ build_binary() {
message "Configuring cmake silently"
${CMAKE_BIN_NAME} "${MDB_CMAKE_FLAGS[@]}" -S"$MDB_SOURCE_PATH" -B"$MARIA_BUILD_PATH" | spinner
message_split
# check_debian_install_file // will be uncommented later
check_debian_install_file
generate_svgs
${CMAKE_BIN_NAME} --build "$MARIA_BUILD_PATH" -j "$CPUS" | onelinearizator

View File

@@ -1,7 +1,8 @@
# order is important dbcon/mysql is added in the main CMakeLists.txt
add_subdirectory(execplan)
add_subdirectory(joblist)
add_subdirectory(rbo)
add_subdirectory(ddlpackage)
add_subdirectory(ddlpackageproc)
add_subdirectory(dmlpackage)
add_subdirectory(dmlpackageproc)
add_subdirectory(execplan)
add_subdirectory(joblist)
add_subdirectory(mysql)

View File

@@ -42,9 +42,6 @@ set(libcalmysql_SRCS
is_columnstore_files.cpp
is_columnstore_extents.cpp
columnstore_dataload.cpp
rulebased_optimizer.cpp
rbo_apply_parallel_ces.cpp
rbo_predicate_pushdown.cpp
)
set_source_files_properties(ha_mcs.cpp PROPERTIES COMPILE_FLAGS "-fno-implicit-templates")
@@ -61,6 +58,7 @@ if(COMMAND mysql_add_plugin)
MODULE_ONLY
${disabled}
LINK_LIBRARIES
rbo
${ENGINE_LDFLAGS}
${PLUGIN_EXEC_LIBS}
${PLUGIN_WRITE_LIBS}
@@ -88,6 +86,7 @@ else()
columnstore_link(
ha_columnstore
rbo
${S3API_DEPS}
${ENGINE_LDFLAGS}
${ENGINE_WRITE_LIBS}

View File

@@ -5222,7 +5222,7 @@ void extractColumnStatistics(TABLE_LIST* table_ptr, gp_walk_info& gwi)
auto* histogram = dynamic_cast<Histogram_json_hb*>(field->read_stats->histogram);
if (histogram)
{
std::cout << " has stats with " << histogram->buckets.size() << " buckets";
std::cout << " has stats with " << histogram->get_json_histogram().size() << " buckets";
SchemaAndTableName tableName = {field->table->s->db.str, field->table->s->table_name.str};
auto sc =
std::unique_ptr<execplan::SimpleColumn>(buildSimpleColumnFromFieldForStatistics(field, gwi));
@@ -7596,12 +7596,11 @@ int cs_get_select_plan(ha_columnstore_select_handler* handler, THD* thd, SCSEP&
// Derived table projection list optimization.
derivedTableOptimization(&gwi, csep);
{
optimizer::RBOptimizerContext ctx(gwi, *thd, csep->traceOn());
// TODO RBO can crash or fail leaving CSEP in an invalid state, so there must be a valid CSEP copy
// TBD There is a tradeoff b/w copy per rule and copy per optimizer run.
bool csepWasOptimized = optimizer::optimizeCSEP(*csep, ctx);
bool csepWasOptimized = optimizer::optimizeCSEP(*csep, ctx, get_unstable_optimizer(&ctx.thd));
if (csep->traceOn() && csepWasOptimized)
{
cerr << "---------------- cs_get_select_plan optimized EXECUTION PLAN ----------------" << endl;

View File

@@ -156,24 +156,6 @@ void gp_walk_info::mergeTableStatistics(const TableStatisticsMap& aTableStatisti
}
}
std::optional<ColumnStatisticsMap> gp_walk_info::findStatisticsForATable(
SchemaAndTableName& schemaAndTableName)
{
auto tableStatisticsMapIt = tableStatisticsMap.find(schemaAndTableName);
for (auto& [schemaAndTableName, columnStatisticsMap] : tableStatisticsMap)
{
std::cout << "Table " << schemaAndTableName.schema << "." << schemaAndTableName.table
<< " has statistics " << columnStatisticsMap.size() << std::endl;
}
if (tableStatisticsMapIt == tableStatisticsMap.end())
{
return std::nullopt;
}
return {tableStatisticsMapIt->second};
}
} // namespace cal_impl_if
namespace

View File

@@ -95,16 +95,20 @@ enum ClauseType
ORDER_BY
};
struct SchemaAndTableName {
struct SchemaAndTableName
{
std::string schema;
std::string table;
bool operator==(const SchemaAndTableName& other) const {
bool operator==(const SchemaAndTableName& other) const
{
return schema == other.schema && table == other.table;
}
};
struct SchemaAndTableNameHash {
std::size_t operator()(const SchemaAndTableName& k) const {
struct SchemaAndTableNameHash
{
std::size_t operator()(const SchemaAndTableName& k) const
{
return std::hash<std::string>()(k.schema + k.table);
}
};
@@ -116,8 +120,10 @@ typedef std::map<execplan::CalpontSystemCatalog::TableAliasName, std::pair<int,
typedef std::tr1::unordered_map<TABLE_LIST*, std::vector<COND*>> TableOnExprList;
typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
using ColumnName = std::string;
using ColumnStatisticsMap = std::unordered_map<ColumnName, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb*>>>;
using TableStatisticsMap = std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
using ColumnStatisticsMap =
std::unordered_map<ColumnName, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb*>>>;
using TableStatisticsMap =
std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
// This structure is used to store MDB AST -> CSEP translation context.
// There is a column statistics for some columns in a query.
@@ -257,7 +263,22 @@ struct gp_walk_info
~gp_walk_info();
void mergeTableStatistics(const TableStatisticsMap& tableStatisticsMap);
std::optional<ColumnStatisticsMap> findStatisticsForATable(SchemaAndTableName& schemaAndTableName);
std::optional<ColumnStatisticsMap> findStatisticsForATable(SchemaAndTableName& schemaAndTableName)
{
auto tableStatisticsMapIt = tableStatisticsMap.find(schemaAndTableName);
for (auto& [schemaAndTableName, columnStatisticsMap] : tableStatisticsMap)
{
std::cout << "Table " << schemaAndTableName.schema << "." << schemaAndTableName.table
<< " has statistics " << columnStatisticsMap.size() << std::endl;
}
if (tableStatisticsMapIt == tableStatisticsMap.end())
{
return std::nullopt;
}
return {tableStatisticsMapIt->second};
}
};
struct SubQueryChainHolder;

View File

@@ -43,6 +43,9 @@ void applyParallelCES_exists(execplan::CalpontSelectExecutionPlan& csep, const s
static const std::string RewrittenSubTableAliasPrefix = "$added_sub_";
static const size_t MaxParallelFactor = 16;
namespace details
{
bool tableIsInUnion(const execplan::CalpontSystemCatalog::TableAliasName& table,
execplan::CalpontSelectExecutionPlan& csep)
{
@@ -77,13 +80,6 @@ bool someForeignTablesHasStatisticsAndMbIndex(execplan::CalpontSelectExecutionPl
});
}
bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx)
{
// TODO filter out CSEPs with orderBy, groupBy, having || or clean up OB,GB,HAVING cloning CSEP
// Filter out tables that were re-written.
return someAreForeignTables(csep) && someForeignTablesHasStatisticsAndMbIndex(csep, ctx);
}
// This routine produces a new ParseTree that is AND(lowerBand <= column, column <= upperBand)
// TODO add engine-independent statistics-derived ranges
execplan::ParseTree* filtersWithNewRange(execplan::SCSEP& csep, execplan::SimpleColumn& column,
@@ -201,13 +197,23 @@ std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyC
return std::nullopt;
}
} // namespace details
using namespace details;
bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx)
{
// TODO filter out CSEPs with orderBy, groupBy, having || or clean up OB,GB,HAVING cloning CSEP
// Filter out tables that were re-written.
return someAreForeignTables(csep) && someForeignTablesHasStatisticsAndMbIndex(csep, ctx);
}
// Populates range bounds based on column statistics
// Returns optional with bounds if successful, nullopt otherwise
template <typename T>
std::optional<FilterRangeBounds<T>> populateRangeBounds(Histogram_json_hb* columnStatistics)
std::optional<details::FilterRangeBounds<T>> populateRangeBounds(Histogram_json_hb* columnStatistics)
{
FilterRangeBounds<T> bounds;
details::FilterRangeBounds<T> bounds;
// TODO configurable parallel factor via session variable
// NB now histogram size is the way to control parallel factor with 16 being the maximum
@@ -248,7 +254,6 @@ std::optional<FilterRangeBounds<T>> populateRangeBounds(Histogram_json_hb* colum
return bounds;
}
// TODO char and other numerical types support
execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
execplan::CalpontSelectExecutionPlan& csep, execplan::CalpontSystemCatalog::TableAliasName& table,
@@ -306,11 +311,9 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
return unionVec;
}
execplan::SCSEP createDerivedTableFromTable(
execplan::CalpontSelectExecutionPlan& csep,
execplan::SCSEP createDerivedTableFromTable(execplan::CalpontSelectExecutionPlan& csep,
const execplan::CalpontSystemCatalog::TableAliasName& table,
const std::string& tableAlias,
optimizer::RBOptimizerContext& ctx)
const std::string& tableAlias, optimizer::RBOptimizerContext& ctx)
{
// Don't copy filters for this
auto derivedSCEP = csep.cloneForTableWORecursiveSelectsGbObHaving(table, false);
@@ -324,7 +327,8 @@ execplan::SCSEP createDerivedTableFromTable(
{
return execplan::SCSEP();
}
auto additionalUnionVec = makeUnionFromTable(*derivedCSEP, const_cast<execplan::CalpontSystemCatalog::TableAliasName&>(table), ctx);
auto additionalUnionVec = makeUnionFromTable(
*derivedCSEP, const_cast<execplan::CalpontSystemCatalog::TableAliasName&>(table), ctx);
// TODO add original alias to support multiple same name tables
derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM);
@@ -357,8 +361,8 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBO
// TODO add column statistics check to the corresponding match
if (!table.isColumnstore() && anyColumnStatistics)
{
std::string tableAlias = optimizer::RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" +
std::to_string(ctx.uniqueId);
std::string tableAlias = optimizer::RewrittenSubTableAliasPrefix + table.schema + "_" + table.table +
"_" + std::to_string(ctx.uniqueId);
tableAliasMap.insert({table, {tableAlias, 0}});
tableAliasToSCPositionsMap.insert({table, {tableAlias, {}, 0}});
execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, "");
@@ -409,11 +413,13 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBO
{
SCAliasToPosCounterMap.insert({sc->columnName(), currentColPosition++});
colPosition++;
std::cout << " first case new column in the map colPosition " << SCAliasToPosCounterMap[sc->columnName()] << std::endl;
std::cout << " first case new column in the map colPosition "
<< SCAliasToPosCounterMap[sc->columnName()] << std::endl;
}
else
{
std::cout << " first case reusing column from the map colPosition " << SCAliasToPosCounterMap[sc->columnName()] << std::endl;
std::cout << " first case reusing column from the map colPosition "
<< SCAliasToPosCounterMap[sc->columnName()] << std::endl;
}
assert(SCAliasToPosCounterMap[sc->columnName()] == colPosition - 1);
newSC->colPosition(SCAliasToPosCounterMap[sc->columnName()]);
@@ -457,11 +463,13 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBO
if (it == SCAliasToPosCounterMap.end())
{
SCAliasToPosCounterMap.insert({sc->columnName(), currentColPosition++});
std::cout << " 2nd case new column in the map colPosition " << SCAliasToPosCounterMap[sc->columnName()] << std::endl;
std::cout << " 2nd case new column in the map colPosition "
<< SCAliasToPosCounterMap[sc->columnName()] << std::endl;
}
else
{
std::cout << " 2nd case reusing column from the map colPosition " << SCAliasToPosCounterMap[sc->columnName()] << std::endl;
std::cout << " 2nd case reusing column from the map colPosition "
<< SCAliasToPosCounterMap[sc->columnName()] << std::endl;
}
assert(SCAliasToPosCounterMap[sc->columnName()] == colPosition);
sc->colPosition(SCAliasToPosCounterMap[sc->columnName()]);
@@ -488,7 +496,8 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBO
auto tableAliasToSCPositionsIt = tableAliasToSCPositionsMap.find(*sc->singleTable());
if (tableAliasToSCPositionsIt != tableAliasToSCPositionsMap.end())
{
auto& [newTableAlias, SCAliasToPosCounterMap, currentColPosition] = tableAliasToSCPositionsIt->second;
auto& [newTableAlias, SCAliasToPosCounterMap, currentColPosition] =
tableAliasToSCPositionsIt->second;
std::cout << " filters map colPosition " << SCAliasToPosCounterMap[sc->columnName()] << std::endl;
auto it = SCAliasToPosCounterMap.find(sc->columnName());
if (it == SCAliasToPosCounterMap.end())

View File

@@ -21,7 +21,11 @@
#include <my_config.h>
#include "idb_mysql.h"
#include <optional>
#include <vector>
#include "execplan/calpontselectexecutionplan.h"
#include "execplan/simplecolumn.h"
#include "rulebased_optimizer.h"
namespace optimizer
@@ -62,6 +66,33 @@ using TableAliasToNewAliasAndSCPositionsMap =
std::map<execplan::CalpontSystemCatalog::TableAliasName,
std::tuple<std::string, SCAliasToPosCounterMap, size_t>, TableAliasLessThan>;
// Helper functions in details namespace
namespace details
{
template <typename T>
using FilterRangeBounds = std::vector<std::pair<T, T>>;
bool tableIsInUnion(const execplan::CalpontSystemCatalog::TableAliasName& table,
execplan::CalpontSelectExecutionPlan& csep);
bool someAreForeignTables(execplan::CalpontSelectExecutionPlan& csep);
bool someForeignTablesHasStatisticsAndMbIndex(execplan::CalpontSelectExecutionPlan& csep,
optimizer::RBOptimizerContext& ctx);
execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPlan& csep,
execplan::CalpontSystemCatalog::TableAliasName& targetTable,
optimizer::RBOptimizerContext& ctx);
std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyColumnAndStatistics(
execplan::CalpontSystemCatalog::TableAliasName& targetTable, optimizer::RBOptimizerContext& ctx);
Histogram_json_hb* chooseStatisticsToUse(const std::vector<Histogram_json_hb*>& statisticsVec);
} // namespace details
// Main functions
bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx);
bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx);
} // namespace optimizer

View File

@@ -49,11 +49,11 @@ bool optimizeCSEPWithRules(execplan::CalpontSelectExecutionPlan& root, const std
}
// high level API call for optimizer
bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, optimizer::RBOptimizerContext& ctx)
bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, optimizer::RBOptimizerContext& ctx,
bool useUnstableOptimizer)
{
std::vector<optimizer::Rule> rules;
if (get_unstable_optimizer(&ctx.thd))
if (useUnstableOptimizer)
{
optimizer::Rule parallelCES{"parallel_ces", optimizer::parallelCESFilter, optimizer::applyParallelCES};
rules.push_back(parallelCES);

View File

@@ -21,18 +21,23 @@
#define PREFER_MY_CONFIG_H
#include <my_config.h>
#include "idb_mysql.h"
// #include "idb_mysql.h"
#include "ha_mcs_impl_if.h"
#include "execplan/calpontselectexecutionplan.h"
namespace optimizer {
namespace optimizer
{
class RBOptimizerContext {
class RBOptimizerContext
{
public:
RBOptimizerContext() = delete;
RBOptimizerContext(cal_impl_if::gp_walk_info& walk_info, THD& thd, bool logRules) : gwi(walk_info), thd(thd), logRules(logRules) {}
RBOptimizerContext(cal_impl_if::gp_walk_info& walk_info, THD& thd, bool logRules)
: gwi(walk_info), thd(thd), logRules(logRules)
{
}
// gwi lifetime should be longer than optimizer context.
// In plugin runtime this is always true.
cal_impl_if::gp_walk_info& gwi;
@@ -73,5 +78,6 @@ struct Rule
bool walk(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx) const;
};
bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, RBOptimizerContext& ctx);
}
bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, RBOptimizerContext& ctx,
bool useUnstableOptimizer);
} // namespace optimizer

6
dbcon/rbo/CMakeLists.txt Normal file
View File

@@ -0,0 +1,6 @@
set(rbo_SRCS ../mysql/rulebased_optimizer.cpp ../mysql/rbo_apply_parallel_ces.cpp ../mysql/rbo_predicate_pushdown.cpp)
columnstore_library(rbo ${rbo_SRCS})
target_include_directories(rbo PUBLIC ${ENGINE_COMMON_INCLUDES})
columnstore_link(rbo PUBLIC execplan)

View File

@@ -80,6 +80,7 @@ usr/lib/*/libmarias3.so
usr/lib/*/liboamcpp.so
usr/lib/*/libquerystats.so
usr/lib/*/libquerytele.so
usr/lib/*/librbo.so
usr/lib/*/libregr.so
usr/lib/*/librowgroup.so
usr/lib/*/librwlock.so

View File

@@ -98,6 +98,11 @@ if(WITH_UNITTESTS)
columnstore_link(poolallocator ${ENGINE_LDFLAGS} ${ENGINE_WRITE_LIBS} ${GTEST_LIBRARIES})
gtest_add_tests(TARGET poolallocator TEST_PREFIX columnstore:)
add_executable(rbo_tests rbo_hybrid.cpp)
add_dependencies(rbo_tests googletest)
columnstore_link(rbo_tests rbo ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${ENGINE_EXEC_LIBS})
gtest_add_tests(TARGET rbo_tests TEST_PREFIX columnstore:)
add_executable(stlpoolallocator stlpoolallocator.cpp)
target_compile_options(stlpoolallocator PRIVATE -Wno-sign-compare)
add_dependencies(stlpoolallocator googletest)

533
tests/rbo_hybrid.cpp Normal file
View File

@@ -0,0 +1,533 @@
/*
* Unit tests for RBO parallel rewrite functionality using hybrid real/mock approach
* Uses real MariaDB ColumnStore classes where practical, minimal mocks where system infrastructure is
* required
*/
#include <gtest/gtest.h>
#include <memory>
#include <vector>
#include <string>
#include "../dbcon/mysql/rbo_apply_parallel_ces.h"
#include "../dbcon/execplan/calpontselectexecutionplan.h"
#include "../dbcon/execplan/simplecolumn.h"
#include "../dbcon/mysql/ha_mcs_impl_if.h"
class RBOHybridTest : public ::testing::Test
{
protected:
void SetUp() override
{
}
void TearDown() override
{
}
// Mock SimpleColumn that doesn't require Syscat infrastructure
class MockSimpleColumn : public execplan::SimpleColumn
{
public:
MockSimpleColumn(const std::string& schema, const std::string& table, const std::string& column)
: execplan::SimpleColumn()
{
// Set basic properties without requiring Syscat
schemaName(schema);
tableName(table);
columnName(column);
// Note: OID and other system-dependent properties are not set
}
};
// Helper to create a mock SimpleColumn
boost::shared_ptr<MockSimpleColumn> createMockSimpleColumn(const std::string& schema,
const std::string& table,
const std::string& column)
{
return boost::shared_ptr<MockSimpleColumn>(new MockSimpleColumn(schema, table, column));
}
// Helper to create a real CalpontSelectExecutionPlan
boost::shared_ptr<execplan::CalpontSelectExecutionPlan> createCSEP()
{
return boost::shared_ptr<execplan::CalpontSelectExecutionPlan>(
new execplan::CalpontSelectExecutionPlan());
}
// Helper to create a real TableAliasName (this is just a struct, no system dependencies)
execplan::CalpontSystemCatalog::TableAliasName createTableAlias(const std::string& schema,
const std::string& table,
const std::string& alias = "",
bool isColumnStore = true)
{
execplan::CalpontSystemCatalog::TableAliasName tableAlias;
tableAlias.schema = schema;
tableAlias.table = table;
tableAlias.alias = alias.empty() ? table : alias;
tableAlias.view = "";
tableAlias.fisColumnStore = isColumnStore;
return tableAlias;
}
// Mock structures needed for RBOptimizerContext
struct MockTHD
{
// Minimal THD mock for testing
uint64_t thread_id = 1;
// Add other fields as needed
};
struct MockGatewayInfo
{
std::unordered_map<
cal_impl_if::SchemaAndTableName,
std::map<std::string, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb*>>>,
cal_impl_if::SchemaAndTableNameHash>
tableStatisticsMap;
// Helper method to find statistics for a table
std::map<std::string, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb*>>>*
findStatisticsForATable(const cal_impl_if::SchemaAndTableName& schemaAndTable)
{
auto it = tableStatisticsMap.find(schemaAndTable);
return (it != tableStatisticsMap.end()) ? &(it->second) : nullptr;
}
};
// Create a simplified mock approach since RBOptimizerContext is complex to mock properly
// We'll create a wrapper that provides the interface we need for testing
class MockRBOptimizerContextWrapper
{
private:
MockTHD mockTHD;
MockGatewayInfo mockGWI;
public:
MockRBOptimizerContextWrapper()
{
}
// Helper to add test statistics
void addTableStatistics(const std::string& schema, const std::string& table, const std::string& column,
Histogram_json_hb* histogram)
{
cal_impl_if::SchemaAndTableName schemaAndTable = {schema, table};
execplan::SimpleColumn simpleCol; // Mock column
std::vector<Histogram_json_hb*> histograms = {histogram};
mockGWI.tableStatisticsMap[schemaAndTable][column] = std::make_pair(simpleCol, histograms);
}
// Get the mock gateway info for testing helper functions
MockGatewayInfo& getGWI()
{
return mockGWI;
}
};
// Helper to create a mock optimizer context
std::unique_ptr<MockRBOptimizerContextWrapper> createMockOptimizerContext()
{
return std::make_unique<MockRBOptimizerContextWrapper>();
}
// Mock histogram for testing (Histogram_json_hb is final, so we can't inherit)
// We'll use a simple wrapper approach instead
struct MockHistogramData
{
std::vector<uint32_t> testValues;
MockHistogramData(const std::vector<uint32_t>& values) : testValues(values)
{
}
};
boost::shared_ptr<MockHistogramData> createMockHistogram(const std::vector<uint32_t>& values)
{
return boost::shared_ptr<MockHistogramData>(new MockHistogramData(values));
}
};
// Test helper functions that work with real data structures
TEST_F(RBOHybridTest, HelperFunctionsWithRealStructures)
{
// Test someAreForeignTables with real CSEP and real TableAliasName structures
auto csep = createCSEP();
// Initially empty, should return false
EXPECT_FALSE(optimizer::details::someAreForeignTables(*csep));
// Create table lists using real TableAliasName structures
execplan::CalpontSelectExecutionPlan::TableList tables;
// Add ColumnStore table (real structure)
auto csTable = createTableAlias("test_schema", "cs_table", "", true);
tables.push_back(csTable);
csep->tableList(tables);
EXPECT_FALSE(optimizer::details::someAreForeignTables(*csep));
// Add foreign table (real structure)
auto foreignTable = createTableAlias("test_schema", "foreign_table", "", false);
tables.push_back(foreignTable);
csep->tableList(tables);
EXPECT_TRUE(optimizer::details::someAreForeignTables(*csep));
}
// Test tableIsInUnion with real CSEP and TableAliasName
TEST_F(RBOHybridTest, TableIsInUnionWithRealStructures)
{
auto csep = createCSEP();
auto testTable = createTableAlias("test_schema", "test_table");
// Test with no unions
EXPECT_FALSE(optimizer::details::tableIsInUnion(testTable, *csep));
// Create a union subquery using real CSEP
auto unionPlan = createCSEP();
execplan::CalpontSelectExecutionPlan::TableList unionTables;
unionTables.push_back(testTable);
unionPlan->tableList(unionTables);
// Add to union vector
execplan::CalpontSelectExecutionPlan::SelectList unions;
boost::shared_ptr<execplan::CalpontExecutionPlan> unionPlanBase = unionPlan;
unions.push_back(unionPlanBase);
csep->unionVec(unions);
// Test with table present in union
EXPECT_TRUE(optimizer::details::tableIsInUnion(testTable, *csep));
// Test with table not present in union
auto otherTable = createTableAlias("test_schema", "other_table");
EXPECT_FALSE(optimizer::details::tableIsInUnion(otherTable, *csep));
}
// Test real TableAliasName structure functionality
TEST_F(RBOHybridTest, RealTableAliasNameBasics)
{
// Test creating and manipulating real TableAliasName structures
auto table1 = createTableAlias("schema1", "table1", "alias1", true);
auto table2 = createTableAlias("schema2", "table2", "", false);
EXPECT_EQ("schema1", table1.schema);
EXPECT_EQ("table1", table1.table);
EXPECT_EQ("alias1", table1.alias);
EXPECT_TRUE(table1.fisColumnStore);
EXPECT_EQ("schema2", table2.schema);
EXPECT_EQ("table2", table2.table);
EXPECT_EQ("table2", table2.alias); // Should default to table name
EXPECT_FALSE(table2.fisColumnStore);
}
// Test real CalpontSelectExecutionPlan functionality
TEST_F(RBOHybridTest, RealCSEPBasics)
{
auto csep = createCSEP();
// Test table list operations with real structures
execplan::CalpontSelectExecutionPlan::TableList tables;
auto table1 = createTableAlias("schema1", "table1");
auto table2 = createTableAlias("schema2", "table2");
tables.push_back(table1);
tables.push_back(table2);
csep->tableList(tables);
const auto& retrievedTables = csep->tableList();
EXPECT_EQ(2u, retrievedTables.size());
EXPECT_EQ("schema1", retrievedTables[0].schema);
EXPECT_EQ("table1", retrievedTables[0].table);
EXPECT_EQ("schema2", retrievedTables[1].schema);
EXPECT_EQ("table2", retrievedTables[1].table);
}
// Test with mock SimpleColumn (since real one requires Syscat)
TEST_F(RBOHybridTest, MockSimpleColumnBasics)
{
auto column = createMockSimpleColumn("test_schema", "test_table", "test_column");
EXPECT_EQ("test_schema", column->schemaName());
EXPECT_EQ("test_table", column->tableName());
EXPECT_EQ("test_column", column->columnName());
// Test that we can modify column properties
column->schemaName("new_schema");
column->tableName("new_table");
column->columnName("new_column");
EXPECT_EQ("new_schema", column->schemaName());
EXPECT_EQ("new_table", column->tableName());
EXPECT_EQ("new_column", column->columnName());
}
// Test integration with real CSEP and mock columns
TEST_F(RBOHybridTest, CSEPWithMockColumns)
{
auto csep = createCSEP();
// Test returned columns operations with mock SimpleColumns
execplan::CalpontSelectExecutionPlan::ReturnedColumnList cols;
auto column1 = createMockSimpleColumn("schema1", "table1", "col1");
auto column2 = createMockSimpleColumn("schema2", "table2", "col2");
boost::shared_ptr<execplan::ReturnedColumn> col1Base = column1;
boost::shared_ptr<execplan::ReturnedColumn> col2Base = column2;
cols.push_back(col1Base);
cols.push_back(col2Base);
csep->returnedCols(cols);
const auto& retrievedCols = csep->returnedCols();
EXPECT_EQ(2u, retrievedCols.size());
// Test casting back to MockSimpleColumn
auto mockCol1 = boost::dynamic_pointer_cast<MockSimpleColumn>(retrievedCols[0]);
EXPECT_NE(nullptr, mockCol1);
if (mockCol1)
{
EXPECT_EQ("schema1", mockCol1->schemaName());
EXPECT_EQ("table1", mockCol1->tableName());
EXPECT_EQ("col1", mockCol1->columnName());
}
}
// Test helper functions that can work with mock context
TEST_F(RBOHybridTest, HelperFunctionsWithMockContext)
{
auto csep = createCSEP();
auto mockCtx = createMockOptimizerContext();
// Add mixed table types
execplan::CalpontSelectExecutionPlan::TableList tables;
auto csTable = createTableAlias("test_schema", "cs_table", "", true);
auto foreignTable = createTableAlias("test_schema", "foreign_table", "", false);
tables.push_back(csTable);
tables.push_back(foreignTable);
csep->tableList(tables);
// Test someAreForeignTables (doesn't need context)
EXPECT_TRUE(optimizer::details::someAreForeignTables(*csep));
// Note: Functions that require real RBOptimizerContext would need to be tested
// with actual system infrastructure or more sophisticated mocking
}
// Test edge cases with real structures
TEST_F(RBOHybridTest, EdgeCasesWithRealStructures)
{
// Test with empty execution plan
auto emptyCSEP = createCSEP();
EXPECT_FALSE(optimizer::details::someAreForeignTables(*emptyCSEP));
// Test with empty table alias
auto emptyTable = createTableAlias("", "", "");
EXPECT_TRUE(emptyTable.schema.empty());
EXPECT_TRUE(emptyTable.table.empty());
// Test TableAliasName comparison (if TableAliasLessThan is accessible)
auto table1 = createTableAlias("schema1", "table1");
auto table2 = createTableAlias("schema2", "table2");
// These are real structures that can be compared
EXPECT_NE(table1.schema, table2.schema);
EXPECT_NE(table1.table, table2.table);
}
// Test parallelCESFilter logic through helper functions (since direct testing requires complex
// RBOptimizerContext)
TEST_F(RBOHybridTest, ParallelCESFilterLogicTesting)
{
auto csep = createCSEP();
// Test the first condition: someAreForeignTables
// Test 1: All ColumnStore tables - should return false
execplan::CalpontSelectExecutionPlan::TableList csOnlyTables;
auto csTable1 = createTableAlias("test_schema", "cs_table1", "", true);
auto csTable2 = createTableAlias("test_schema", "cs_table2", "", true);
csOnlyTables.push_back(csTable1);
csOnlyTables.push_back(csTable2);
csep->tableList(csOnlyTables);
EXPECT_FALSE(optimizer::details::someAreForeignTables(*csep));
// Test 2: Mixed tables with foreign tables
execplan::CalpontSelectExecutionPlan::TableList mixedTables;
auto foreignTable = createTableAlias("test_schema", "foreign_table", "", false);
mixedTables.push_back(csTable1);
mixedTables.push_back(foreignTable);
csep->tableList(mixedTables);
EXPECT_TRUE(optimizer::details::someAreForeignTables(*csep));
// Test 3: Only foreign tables
execplan::CalpontSelectExecutionPlan::TableList foreignOnlyTables;
auto foreignTable1 = createTableAlias("test_schema", "foreign_table1", "", false);
auto foreignTable2 = createTableAlias("test_schema", "foreign_table2", "", false);
foreignOnlyTables.push_back(foreignTable1);
foreignOnlyTables.push_back(foreignTable2);
csep->tableList(foreignOnlyTables);
EXPECT_TRUE(optimizer::details::someAreForeignTables(*csep));
// Note: Testing the full parallelCESFilter function would require a real RBOptimizerContext
// with proper statistics setup, which is not feasible in unit tests without system infrastructure
}
// Test applyParallelCES prerequisites and data structure setup
TEST_F(RBOHybridTest, ApplyParallelCESPrerequisites)
{
auto csep = createCSEP();
// Set up a realistic test scenario that would be suitable for parallel rewrite
execplan::CalpontSelectExecutionPlan::TableList tables;
auto csTable = createTableAlias("test_schema", "cs_table", "", true);
auto foreignTable = createTableAlias("test_schema", "foreign_table", "", false);
tables.push_back(csTable);
tables.push_back(foreignTable);
csep->tableList(tables);
// Add some mock columns
execplan::CalpontSelectExecutionPlan::ReturnedColumnList cols;
auto column = createMockSimpleColumn("test_schema", "foreign_table", "id");
boost::shared_ptr<execplan::ReturnedColumn> columnBase = column;
cols.push_back(columnBase);
csep->returnedCols(cols);
// Verify the setup meets the basic requirements for parallel rewrite consideration
EXPECT_TRUE(optimizer::details::someAreForeignTables(*csep)); // Has foreign tables
EXPECT_EQ(2u, csep->tableList().size()); // Has multiple tables
EXPECT_EQ(1u, csep->returnedCols().size()); // Has columns
EXPECT_EQ(0u, csep->unionVec().size()); // No existing unions
// Verify table types are correctly identified
const auto& retrievedTables = csep->tableList();
EXPECT_TRUE(retrievedTables[0].fisColumnStore); // cs_table
EXPECT_FALSE(retrievedTables[1].fisColumnStore); // foreign_table
// Verify column can be cast back to SimpleColumn
const auto& retrievedCols = csep->returnedCols();
auto simpleCol = boost::dynamic_pointer_cast<MockSimpleColumn>(retrievedCols[0]);
EXPECT_NE(nullptr, simpleCol);
if (simpleCol)
{
EXPECT_EQ("test_schema", simpleCol->schemaName());
EXPECT_EQ("foreign_table", simpleCol->tableName());
EXPECT_EQ("id", simpleCol->columnName());
}
// Note: Testing the actual applyParallelCES function would require a real RBOptimizerContext
// with proper statistics setup, which is not feasible in unit tests without system infrastructure
}
// Test parallelCESFilter edge cases through helper functions
TEST_F(RBOHybridTest, ParallelCESFilterEdgeCases)
{
// Test 1: Empty execution plan
auto emptyCSEP = createCSEP();
EXPECT_FALSE(optimizer::details::someAreForeignTables(*emptyCSEP)); // Should return false for empty plan
// Test 2: Only foreign tables (no ColumnStore tables)
auto foreignOnlyCSEP = createCSEP();
execplan::CalpontSelectExecutionPlan::TableList foreignTables;
auto foreignTable1 = createTableAlias("test_schema", "foreign_table1", "", false);
auto foreignTable2 = createTableAlias("test_schema", "foreign_table2", "", false);
foreignTables.push_back(foreignTable1);
foreignTables.push_back(foreignTable2);
foreignOnlyCSEP->tableList(foreignTables);
// Verify that someAreForeignTables returns true
EXPECT_TRUE(optimizer::details::someAreForeignTables(*foreignOnlyCSEP));
// Test 3: Tables with union subqueries
auto unionCSEP = createCSEP();
auto mainTable = createTableAlias("test_schema", "main_table", "", false);
execplan::CalpontSelectExecutionPlan::TableList mainTables;
mainTables.push_back(mainTable);
unionCSEP->tableList(mainTables);
// Add union subquery
auto unionSubquery = createCSEP();
auto unionTable = createTableAlias("test_schema", "union_table", "", true);
execplan::CalpontSelectExecutionPlan::TableList unionTables;
unionTables.push_back(unionTable);
unionSubquery->tableList(unionTables);
execplan::CalpontSelectExecutionPlan::SelectList unions;
boost::shared_ptr<execplan::CalpontExecutionPlan> unionBase = unionSubquery;
unions.push_back(unionBase);
unionCSEP->unionVec(unions);
// Test tableIsInUnion functionality
EXPECT_TRUE(optimizer::details::tableIsInUnion(unionTable, *unionCSEP));
EXPECT_FALSE(optimizer::details::tableIsInUnion(mainTable, *unionCSEP));
}
// Test applyParallelCES scenarios through data structure validation
TEST_F(RBOHybridTest, ApplyParallelCESScenarios)
{
// Scenario 1: Query that should not be rewritten (all ColumnStore tables)
auto csOnlyCSEP = createCSEP();
execplan::CalpontSelectExecutionPlan::TableList csTables;
auto csTable1 = createTableAlias("test_schema", "cs_table1", "", true);
auto csTable2 = createTableAlias("test_schema", "cs_table2", "", true);
csTables.push_back(csTable1);
csTables.push_back(csTable2);
csOnlyCSEP->tableList(csTables);
// Should not apply parallel rewrite
EXPECT_FALSE(optimizer::details::someAreForeignTables(*csOnlyCSEP));
// Scenario 2: Query with foreign tables but no statistics
auto noStatsCSEP = createCSEP();
execplan::CalpontSelectExecutionPlan::TableList mixedTables;
auto foreignTable = createTableAlias("test_schema", "foreign_table", "", false);
mixedTables.push_back(csTable1);
mixedTables.push_back(foreignTable);
noStatsCSEP->tableList(mixedTables);
// Has foreign tables but no statistics in mock context
EXPECT_TRUE(optimizer::details::someAreForeignTables(*noStatsCSEP));
// Scenario 3: Complex query with multiple foreign tables and columns
auto complexCSEP = createCSEP();
execplan::CalpontSelectExecutionPlan::TableList complexTables;
auto foreignTable1 = createTableAlias("schema1", "foreign_table1", "ft1", false);
auto foreignTable2 = createTableAlias("schema2", "foreign_table2", "ft2", false);
auto csTable = createTableAlias("schema1", "cs_table", "ct", true);
complexTables.push_back(foreignTable1);
complexTables.push_back(foreignTable2);
complexTables.push_back(csTable);
complexCSEP->tableList(complexTables);
// Add multiple columns
execplan::CalpontSelectExecutionPlan::ReturnedColumnList complexCols;
auto col1 = createMockSimpleColumn("schema1", "foreign_table1", "id");
auto col2 = createMockSimpleColumn("schema1", "foreign_table1", "name");
auto col3 = createMockSimpleColumn("schema2", "foreign_table2", "value");
auto col4 = createMockSimpleColumn("schema1", "cs_table", "cs_id");
boost::shared_ptr<execplan::ReturnedColumn> col1Base = col1;
boost::shared_ptr<execplan::ReturnedColumn> col2Base = col2;
boost::shared_ptr<execplan::ReturnedColumn> col3Base = col3;
boost::shared_ptr<execplan::ReturnedColumn> col4Base = col4;
complexCols.push_back(col1Base);
complexCols.push_back(col2Base);
complexCols.push_back(col3Base);
complexCols.push_back(col4Base);
complexCSEP->returnedCols(complexCols);
// Verify the setup
EXPECT_TRUE(optimizer::details::someAreForeignTables(*complexCSEP));
EXPECT_EQ(3u, complexCSEP->tableList().size());
EXPECT_EQ(4u, complexCSEP->returnedCols().size());
// Test that we can identify foreign vs ColumnStore tables
const auto& retrievedTables = complexCSEP->tableList();
EXPECT_FALSE(retrievedTables[0].fisColumnStore); // foreign_table1
EXPECT_FALSE(retrievedTables[1].fisColumnStore); // foreign_table2
EXPECT_TRUE(retrievedTables[2].fisColumnStore); // cs_table
}