diff --git a/CMakeLists.txt b/CMakeLists.txt index b4fb6e1e2..257b27bfa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,18 +62,13 @@ columnstore_install_file(${CMAKE_CURRENT_BINARY_DIR}/gitversionEngine ${ENGINE_S set(COMPONENTS utils oam/oamcpp - dbcon/execplan - dbcon/joblist + dbcon versioning oam writeengine/wrapper writeengine/client writeengine/xml writeengine/redistribute - dbcon/ddlpackage - dbcon/ddlpackageproc - dbcon/dmlpackage - dbcon/dmlpackageproc ddlproc dmlproc oamapps diff --git a/build/bootstrap_mcs.sh b/build/bootstrap_mcs.sh index 73b835417..10a6c79d6 100755 --- a/build/bootstrap_mcs.sh +++ b/build/bootstrap_mcs.sh @@ -572,7 +572,7 @@ build_binary() { message "Configuring cmake silently" ${CMAKE_BIN_NAME} "${MDB_CMAKE_FLAGS[@]}" -S"$MDB_SOURCE_PATH" -B"$MARIA_BUILD_PATH" | spinner message_split - # check_debian_install_file // will be uncommented later + check_debian_install_file generate_svgs ${CMAKE_BIN_NAME} --build "$MARIA_BUILD_PATH" -j "$CPUS" | onelinearizator diff --git a/dbcon/CMakeLists.txt b/dbcon/CMakeLists.txt index 8640f0e7a..d2fbb67bc 100644 --- a/dbcon/CMakeLists.txt +++ b/dbcon/CMakeLists.txt @@ -1,7 +1,8 @@ +# order is important dbcon/mysql is added in the main CMakeLists.txt +add_subdirectory(execplan) +add_subdirectory(joblist) +add_subdirectory(rbo) add_subdirectory(ddlpackage) add_subdirectory(ddlpackageproc) add_subdirectory(dmlpackage) add_subdirectory(dmlpackageproc) -add_subdirectory(execplan) -add_subdirectory(joblist) -add_subdirectory(mysql) diff --git a/dbcon/mysql/CMakeLists.txt b/dbcon/mysql/CMakeLists.txt index 8e6017445..1e03b113e 100644 --- a/dbcon/mysql/CMakeLists.txt +++ b/dbcon/mysql/CMakeLists.txt @@ -42,9 +42,6 @@ set(libcalmysql_SRCS is_columnstore_files.cpp is_columnstore_extents.cpp columnstore_dataload.cpp - rulebased_optimizer.cpp - rbo_apply_parallel_ces.cpp - rbo_predicate_pushdown.cpp ) set_source_files_properties(ha_mcs.cpp PROPERTIES COMPILE_FLAGS "-fno-implicit-templates") @@ -61,6 +58,7 @@ if(COMMAND mysql_add_plugin) MODULE_ONLY ${disabled} LINK_LIBRARIES + rbo ${ENGINE_LDFLAGS} ${PLUGIN_EXEC_LIBS} ${PLUGIN_WRITE_LIBS} @@ -88,6 +86,7 @@ else() columnstore_link( ha_columnstore + rbo ${S3API_DEPS} ${ENGINE_LDFLAGS} ${ENGINE_WRITE_LIBS} diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index e143a38ec..44f9378d1 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -5222,7 +5222,7 @@ void extractColumnStatistics(TABLE_LIST* table_ptr, gp_walk_info& gwi) auto* histogram = dynamic_cast(field->read_stats->histogram); if (histogram) { - std::cout << " has stats with " << histogram->buckets.size() << " buckets"; + std::cout << " has stats with " << histogram->get_json_histogram().size() << " buckets"; SchemaAndTableName tableName = {field->table->s->db.str, field->table->s->table_name.str}; auto sc = std::unique_ptr(buildSimpleColumnFromFieldForStatistics(field, gwi)); @@ -7596,12 +7596,11 @@ int cs_get_select_plan(ha_columnstore_select_handler* handler, THD* thd, SCSEP& // Derived table projection list optimization. derivedTableOptimization(&gwi, csep); - { optimizer::RBOptimizerContext ctx(gwi, *thd, csep->traceOn()); // TODO RBO can crash or fail leaving CSEP in an invalid state, so there must be a valid CSEP copy // TBD There is a tradeoff b/w copy per rule and copy per optimizer run. - bool csepWasOptimized = optimizer::optimizeCSEP(*csep, ctx); + bool csepWasOptimized = optimizer::optimizeCSEP(*csep, ctx, get_unstable_optimizer(&ctx.thd)); if (csep->traceOn() && csepWasOptimized) { cerr << "---------------- cs_get_select_plan optimized EXECUTION PLAN ----------------" << endl; diff --git a/dbcon/mysql/ha_mcs_impl.cpp b/dbcon/mysql/ha_mcs_impl.cpp index 88500a216..7a571596b 100644 --- a/dbcon/mysql/ha_mcs_impl.cpp +++ b/dbcon/mysql/ha_mcs_impl.cpp @@ -156,24 +156,6 @@ void gp_walk_info::mergeTableStatistics(const TableStatisticsMap& aTableStatisti } } -std::optional gp_walk_info::findStatisticsForATable( - SchemaAndTableName& schemaAndTableName) -{ - auto tableStatisticsMapIt = tableStatisticsMap.find(schemaAndTableName); - for (auto& [schemaAndTableName, columnStatisticsMap] : tableStatisticsMap) - { - std::cout << "Table " << schemaAndTableName.schema << "." << schemaAndTableName.table - << " has statistics " << columnStatisticsMap.size() << std::endl; - } - - if (tableStatisticsMapIt == tableStatisticsMap.end()) - { - return std::nullopt; - } - - return {tableStatisticsMapIt->second}; -} - } // namespace cal_impl_if namespace diff --git a/dbcon/mysql/ha_mcs_impl_if.h b/dbcon/mysql/ha_mcs_impl_if.h index 13d901ce4..778639907 100644 --- a/dbcon/mysql/ha_mcs_impl_if.h +++ b/dbcon/mysql/ha_mcs_impl_if.h @@ -95,16 +95,20 @@ enum ClauseType ORDER_BY }; -struct SchemaAndTableName { +struct SchemaAndTableName +{ std::string schema; std::string table; - bool operator==(const SchemaAndTableName& other) const { + bool operator==(const SchemaAndTableName& other) const + { return schema == other.schema && table == other.table; } }; -struct SchemaAndTableNameHash { - std::size_t operator()(const SchemaAndTableName& k) const { +struct SchemaAndTableNameHash +{ + std::size_t operator()(const SchemaAndTableName& k) const + { return std::hash()(k.schema + k.table); } }; @@ -116,8 +120,10 @@ typedef std::map> TableOnExprList; typedef std::tr1::unordered_map TableOuterJoinMap; using ColumnName = std::string; -using ColumnStatisticsMap = std::unordered_map>>; -using TableStatisticsMap = std::unordered_map; +using ColumnStatisticsMap = + std::unordered_map>>; +using TableStatisticsMap = + std::unordered_map; // This structure is used to store MDB AST -> CSEP translation context. // There is a column statistics for some columns in a query. @@ -257,7 +263,22 @@ struct gp_walk_info ~gp_walk_info(); void mergeTableStatistics(const TableStatisticsMap& tableStatisticsMap); - std::optional findStatisticsForATable(SchemaAndTableName& schemaAndTableName); + std::optional findStatisticsForATable(SchemaAndTableName& schemaAndTableName) + { + auto tableStatisticsMapIt = tableStatisticsMap.find(schemaAndTableName); + for (auto& [schemaAndTableName, columnStatisticsMap] : tableStatisticsMap) + { + std::cout << "Table " << schemaAndTableName.schema << "." << schemaAndTableName.table + << " has statistics " << columnStatisticsMap.size() << std::endl; + } + + if (tableStatisticsMapIt == tableStatisticsMap.end()) + { + return std::nullopt; + } + + return {tableStatisticsMapIt->second}; + } }; struct SubQueryChainHolder; diff --git a/dbcon/mysql/rbo_apply_parallel_ces.cpp b/dbcon/mysql/rbo_apply_parallel_ces.cpp index e7a631042..fb69f4d2a 100644 --- a/dbcon/mysql/rbo_apply_parallel_ces.cpp +++ b/dbcon/mysql/rbo_apply_parallel_ces.cpp @@ -43,6 +43,9 @@ void applyParallelCES_exists(execplan::CalpontSelectExecutionPlan& csep, const s static const std::string RewrittenSubTableAliasPrefix = "$added_sub_"; static const size_t MaxParallelFactor = 16; +namespace details +{ + bool tableIsInUnion(const execplan::CalpontSystemCatalog::TableAliasName& table, execplan::CalpontSelectExecutionPlan& csep) { @@ -77,13 +80,6 @@ bool someForeignTablesHasStatisticsAndMbIndex(execplan::CalpontSelectExecutionPl }); } -bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) -{ - // TODO filter out CSEPs with orderBy, groupBy, having || or clean up OB,GB,HAVING cloning CSEP - // Filter out tables that were re-written. - return someAreForeignTables(csep) && someForeignTablesHasStatisticsAndMbIndex(csep, ctx); -} - // This routine produces a new ParseTree that is AND(lowerBand <= column, column <= upperBand) // TODO add engine-independent statistics-derived ranges execplan::ParseTree* filtersWithNewRange(execplan::SCSEP& csep, execplan::SimpleColumn& column, @@ -201,13 +197,23 @@ std::optional> chooseKeyC return std::nullopt; } +} // namespace details + +using namespace details; + +bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) +{ + // TODO filter out CSEPs with orderBy, groupBy, having || or clean up OB,GB,HAVING cloning CSEP + // Filter out tables that were re-written. + return someAreForeignTables(csep) && someForeignTablesHasStatisticsAndMbIndex(csep, ctx); +} // Populates range bounds based on column statistics // Returns optional with bounds if successful, nullopt otherwise template -std::optional> populateRangeBounds(Histogram_json_hb* columnStatistics) +std::optional> populateRangeBounds(Histogram_json_hb* columnStatistics) { - FilterRangeBounds bounds; + details::FilterRangeBounds bounds; // TODO configurable parallel factor via session variable // NB now histogram size is the way to control parallel factor with 16 being the maximum @@ -234,7 +240,7 @@ std::optional> populateRangeBounds(Histogram_json_hb* colum T currentLowerBound = *(uint32_t*)bucket.start_value.data(); std::cout << "Bucket: " << currentLowerBound << std::endl; } - // TODO leave this here b/c there is a corresponding JIRA about the last upper range bound. + // TODO leave this here b/c there is a corresponding JIRA about the last upper range bound. // auto penultimateBucket = columnStatistics.get_json_histogram().begin() + numberOfUnionUnits * // numberOfBucketsPerUnionUnit; T currentLowerBound = *(uint32_t*)penultimateBucket->start_value.data(); T // currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data(); @@ -248,11 +254,10 @@ std::optional> populateRangeBounds(Histogram_json_hb* colum return bounds; } - // TODO char and other numerical types support execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( - execplan::CalpontSelectExecutionPlan& csep, execplan::CalpontSystemCatalog::TableAliasName& table, - optimizer::RBOptimizerContext& ctx) + execplan::CalpontSelectExecutionPlan& csep, execplan::CalpontSystemCatalog::TableAliasName& table, + optimizer::RBOptimizerContext& ctx) { execplan::CalpontSelectExecutionPlan::SelectList unionVec; @@ -306,11 +311,9 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( return unionVec; } -execplan::SCSEP createDerivedTableFromTable( - execplan::CalpontSelectExecutionPlan& csep, - const execplan::CalpontSystemCatalog::TableAliasName& table, - const std::string& tableAlias, - optimizer::RBOptimizerContext& ctx) +execplan::SCSEP createDerivedTableFromTable(execplan::CalpontSelectExecutionPlan& csep, + const execplan::CalpontSystemCatalog::TableAliasName& table, + const std::string& tableAlias, optimizer::RBOptimizerContext& ctx) { // Don't copy filters for this auto derivedSCEP = csep.cloneForTableWORecursiveSelectsGbObHaving(table, false); @@ -324,7 +327,8 @@ execplan::SCSEP createDerivedTableFromTable( { return execplan::SCSEP(); } - auto additionalUnionVec = makeUnionFromTable(*derivedCSEP, const_cast(table), ctx); + auto additionalUnionVec = makeUnionFromTable( + *derivedCSEP, const_cast(table), ctx); // TODO add original alias to support multiple same name tables derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM); @@ -357,8 +361,8 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBO // TODO add column statistics check to the corresponding match if (!table.isColumnstore() && anyColumnStatistics) { - std::string tableAlias = optimizer::RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" + - std::to_string(ctx.uniqueId); + std::string tableAlias = optimizer::RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + + "_" + std::to_string(ctx.uniqueId); tableAliasMap.insert({table, {tableAlias, 0}}); tableAliasToSCPositionsMap.insert({table, {tableAlias, {}, 0}}); execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, ""); @@ -409,21 +413,23 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBO { SCAliasToPosCounterMap.insert({sc->columnName(), currentColPosition++}); colPosition++; - std::cout << " first case new column in the map colPosition " << SCAliasToPosCounterMap[sc->columnName()] << std::endl; + std::cout << " first case new column in the map colPosition " + << SCAliasToPosCounterMap[sc->columnName()] << std::endl; } else { - std::cout << " first case reusing column from the map colPosition " << SCAliasToPosCounterMap[sc->columnName()] << std::endl; + std::cout << " first case reusing column from the map colPosition " + << SCAliasToPosCounterMap[sc->columnName()] << std::endl; } - assert(SCAliasToPosCounterMap[sc->columnName()] == colPosition-1); + assert(SCAliasToPosCounterMap[sc->columnName()] == colPosition - 1); newSC->colPosition(SCAliasToPosCounterMap[sc->columnName()]); sc->derivedTable(newTableAlias); } - else + else { newSC->colPosition(colPosition++); } - + newReturnedColumns.push_back(newSC); } // RC doesn't belong to any of the new derived tables @@ -457,11 +463,13 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBO if (it == SCAliasToPosCounterMap.end()) { SCAliasToPosCounterMap.insert({sc->columnName(), currentColPosition++}); - std::cout << " 2nd case new column in the map colPosition " << SCAliasToPosCounterMap[sc->columnName()] << std::endl; + std::cout << " 2nd case new column in the map colPosition " + << SCAliasToPosCounterMap[sc->columnName()] << std::endl; } else { - std::cout << " 2nd case reusing column from the map colPosition " << SCAliasToPosCounterMap[sc->columnName()] << std::endl; + std::cout << " 2nd case reusing column from the map colPosition " + << SCAliasToPosCounterMap[sc->columnName()] << std::endl; } assert(SCAliasToPosCounterMap[sc->columnName()] == colPosition); sc->colPosition(SCAliasToPosCounterMap[sc->columnName()]); @@ -488,7 +496,8 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBO auto tableAliasToSCPositionsIt = tableAliasToSCPositionsMap.find(*sc->singleTable()); if (tableAliasToSCPositionsIt != tableAliasToSCPositionsMap.end()) { - auto& [newTableAlias, SCAliasToPosCounterMap, currentColPosition] = tableAliasToSCPositionsIt->second; + auto& [newTableAlias, SCAliasToPosCounterMap, currentColPosition] = + tableAliasToSCPositionsIt->second; std::cout << " filters map colPosition " << SCAliasToPosCounterMap[sc->columnName()] << std::endl; auto it = SCAliasToPosCounterMap.find(sc->columnName()); if (it == SCAliasToPosCounterMap.end()) diff --git a/dbcon/mysql/rbo_apply_parallel_ces.h b/dbcon/mysql/rbo_apply_parallel_ces.h index 5c8589b1b..5261b0c41 100644 --- a/dbcon/mysql/rbo_apply_parallel_ces.h +++ b/dbcon/mysql/rbo_apply_parallel_ces.h @@ -21,7 +21,11 @@ #include #include "idb_mysql.h" +#include +#include + #include "execplan/calpontselectexecutionplan.h" +#include "execplan/simplecolumn.h" #include "rulebased_optimizer.h" namespace optimizer @@ -62,6 +66,33 @@ using TableAliasToNewAliasAndSCPositionsMap = std::map, TableAliasLessThan>; +// Helper functions in details namespace +namespace details +{ + +template +using FilterRangeBounds = std::vector>; + +bool tableIsInUnion(const execplan::CalpontSystemCatalog::TableAliasName& table, + execplan::CalpontSelectExecutionPlan& csep); + +bool someAreForeignTables(execplan::CalpontSelectExecutionPlan& csep); + +bool someForeignTablesHasStatisticsAndMbIndex(execplan::CalpontSelectExecutionPlan& csep, + optimizer::RBOptimizerContext& ctx); + +execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPlan& csep, + execplan::CalpontSystemCatalog::TableAliasName& targetTable, + optimizer::RBOptimizerContext& ctx); + +std::optional> chooseKeyColumnAndStatistics( + execplan::CalpontSystemCatalog::TableAliasName& targetTable, optimizer::RBOptimizerContext& ctx); + +Histogram_json_hb* chooseStatisticsToUse(const std::vector& statisticsVec); + +} // namespace details + +// Main functions bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx); bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx); } // namespace optimizer \ No newline at end of file diff --git a/dbcon/mysql/rulebased_optimizer.cpp b/dbcon/mysql/rulebased_optimizer.cpp index cb05afb05..73b229078 100644 --- a/dbcon/mysql/rulebased_optimizer.cpp +++ b/dbcon/mysql/rulebased_optimizer.cpp @@ -49,11 +49,11 @@ bool optimizeCSEPWithRules(execplan::CalpontSelectExecutionPlan& root, const std } // high level API call for optimizer -bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, optimizer::RBOptimizerContext& ctx) +bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, optimizer::RBOptimizerContext& ctx, + bool useUnstableOptimizer) { std::vector rules; - - if (get_unstable_optimizer(&ctx.thd)) + if (useUnstableOptimizer) { optimizer::Rule parallelCES{"parallel_ces", optimizer::parallelCESFilter, optimizer::applyParallelCES}; rules.push_back(parallelCES); diff --git a/dbcon/mysql/rulebased_optimizer.h b/dbcon/mysql/rulebased_optimizer.h index ff83b388a..39dc47f6d 100644 --- a/dbcon/mysql/rulebased_optimizer.h +++ b/dbcon/mysql/rulebased_optimizer.h @@ -21,24 +21,29 @@ #define PREFER_MY_CONFIG_H #include -#include "idb_mysql.h" +// #include "idb_mysql.h" #include "ha_mcs_impl_if.h" #include "execplan/calpontselectexecutionplan.h" -namespace optimizer { +namespace optimizer +{ -class RBOptimizerContext { -public: +class RBOptimizerContext +{ + public: RBOptimizerContext() = delete; - RBOptimizerContext(cal_impl_if::gp_walk_info& walk_info, THD& thd, bool logRules) : gwi(walk_info), thd(thd), logRules(logRules) {} - // gwi lifetime should be longer than optimizer context. + RBOptimizerContext(cal_impl_if::gp_walk_info& walk_info, THD& thd, bool logRules) + : gwi(walk_info), thd(thd), logRules(logRules) + { + } + // gwi lifetime should be longer than optimizer context. // In plugin runtime this is always true. cal_impl_if::gp_walk_info& gwi; THD& thd; - uint64_t uniqueId {0}; - bool logRules {false}; + uint64_t uniqueId{0}; + bool logRules{false}; }; struct Rule @@ -73,5 +78,6 @@ struct Rule bool walk(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx) const; }; -bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, RBOptimizerContext& ctx); -} \ No newline at end of file +bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, RBOptimizerContext& ctx, + bool useUnstableOptimizer); +} // namespace optimizer \ No newline at end of file diff --git a/dbcon/rbo/CMakeLists.txt b/dbcon/rbo/CMakeLists.txt new file mode 100644 index 000000000..3d7136538 --- /dev/null +++ b/dbcon/rbo/CMakeLists.txt @@ -0,0 +1,6 @@ +set(rbo_SRCS ../mysql/rulebased_optimizer.cpp ../mysql/rbo_apply_parallel_ces.cpp ../mysql/rbo_predicate_pushdown.cpp) + +columnstore_library(rbo ${rbo_SRCS}) + +target_include_directories(rbo PUBLIC ${ENGINE_COMMON_INCLUDES}) +columnstore_link(rbo PUBLIC execplan) diff --git a/debian/mariadb-plugin-columnstore.install b/debian/mariadb-plugin-columnstore.install index 0efba1cf8..c56cc164d 100644 --- a/debian/mariadb-plugin-columnstore.install +++ b/debian/mariadb-plugin-columnstore.install @@ -80,6 +80,7 @@ usr/lib/*/libmarias3.so usr/lib/*/liboamcpp.so usr/lib/*/libquerystats.so usr/lib/*/libquerytele.so +usr/lib/*/librbo.so usr/lib/*/libregr.so usr/lib/*/librowgroup.so usr/lib/*/librwlock.so diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a70782061..6960429f4 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -98,6 +98,11 @@ if(WITH_UNITTESTS) columnstore_link(poolallocator ${ENGINE_LDFLAGS} ${ENGINE_WRITE_LIBS} ${GTEST_LIBRARIES}) gtest_add_tests(TARGET poolallocator TEST_PREFIX columnstore:) + add_executable(rbo_tests rbo_hybrid.cpp) + add_dependencies(rbo_tests googletest) + columnstore_link(rbo_tests rbo ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${ENGINE_EXEC_LIBS}) + gtest_add_tests(TARGET rbo_tests TEST_PREFIX columnstore:) + add_executable(stlpoolallocator stlpoolallocator.cpp) target_compile_options(stlpoolallocator PRIVATE -Wno-sign-compare) add_dependencies(stlpoolallocator googletest) diff --git a/tests/rbo_hybrid.cpp b/tests/rbo_hybrid.cpp new file mode 100644 index 000000000..7bc4a599a --- /dev/null +++ b/tests/rbo_hybrid.cpp @@ -0,0 +1,533 @@ +/* + * Unit tests for RBO parallel rewrite functionality using hybrid real/mock approach + * Uses real MariaDB ColumnStore classes where practical, minimal mocks where system infrastructure is + * required + */ + +#include +#include +#include +#include + +#include "../dbcon/mysql/rbo_apply_parallel_ces.h" + +#include "../dbcon/execplan/calpontselectexecutionplan.h" +#include "../dbcon/execplan/simplecolumn.h" +#include "../dbcon/mysql/ha_mcs_impl_if.h" + +class RBOHybridTest : public ::testing::Test +{ + protected: + void SetUp() override + { + } + + void TearDown() override + { + } + + // Mock SimpleColumn that doesn't require Syscat infrastructure + class MockSimpleColumn : public execplan::SimpleColumn + { + public: + MockSimpleColumn(const std::string& schema, const std::string& table, const std::string& column) + : execplan::SimpleColumn() + { + // Set basic properties without requiring Syscat + schemaName(schema); + tableName(table); + columnName(column); + // Note: OID and other system-dependent properties are not set + } + }; + + // Helper to create a mock SimpleColumn + boost::shared_ptr createMockSimpleColumn(const std::string& schema, + const std::string& table, + const std::string& column) + { + return boost::shared_ptr(new MockSimpleColumn(schema, table, column)); + } + + // Helper to create a real CalpontSelectExecutionPlan + boost::shared_ptr createCSEP() + { + return boost::shared_ptr( + new execplan::CalpontSelectExecutionPlan()); + } + + // Helper to create a real TableAliasName (this is just a struct, no system dependencies) + execplan::CalpontSystemCatalog::TableAliasName createTableAlias(const std::string& schema, + const std::string& table, + const std::string& alias = "", + bool isColumnStore = true) + { + execplan::CalpontSystemCatalog::TableAliasName tableAlias; + tableAlias.schema = schema; + tableAlias.table = table; + tableAlias.alias = alias.empty() ? table : alias; + tableAlias.view = ""; + tableAlias.fisColumnStore = isColumnStore; + return tableAlias; + } + + // Mock structures needed for RBOptimizerContext + struct MockTHD + { + // Minimal THD mock for testing + uint64_t thread_id = 1; + // Add other fields as needed + }; + + struct MockGatewayInfo + { + std::unordered_map< + cal_impl_if::SchemaAndTableName, + std::map>>, + cal_impl_if::SchemaAndTableNameHash> + tableStatisticsMap; + + // Helper method to find statistics for a table + std::map>>* + findStatisticsForATable(const cal_impl_if::SchemaAndTableName& schemaAndTable) + { + auto it = tableStatisticsMap.find(schemaAndTable); + return (it != tableStatisticsMap.end()) ? &(it->second) : nullptr; + } + }; + + // Create a simplified mock approach since RBOptimizerContext is complex to mock properly + // We'll create a wrapper that provides the interface we need for testing + class MockRBOptimizerContextWrapper + { + private: + MockTHD mockTHD; + MockGatewayInfo mockGWI; + + public: + MockRBOptimizerContextWrapper() + { + } + + // Helper to add test statistics + void addTableStatistics(const std::string& schema, const std::string& table, const std::string& column, + Histogram_json_hb* histogram) + { + cal_impl_if::SchemaAndTableName schemaAndTable = {schema, table}; + execplan::SimpleColumn simpleCol; // Mock column + std::vector histograms = {histogram}; + mockGWI.tableStatisticsMap[schemaAndTable][column] = std::make_pair(simpleCol, histograms); + } + + // Get the mock gateway info for testing helper functions + MockGatewayInfo& getGWI() + { + return mockGWI; + } + }; + + // Helper to create a mock optimizer context + std::unique_ptr createMockOptimizerContext() + { + return std::make_unique(); + } + + // Mock histogram for testing (Histogram_json_hb is final, so we can't inherit) + // We'll use a simple wrapper approach instead + struct MockHistogramData + { + std::vector testValues; + + MockHistogramData(const std::vector& values) : testValues(values) + { + } + }; + + boost::shared_ptr createMockHistogram(const std::vector& values) + { + return boost::shared_ptr(new MockHistogramData(values)); + } +}; + +// Test helper functions that work with real data structures +TEST_F(RBOHybridTest, HelperFunctionsWithRealStructures) +{ + // Test someAreForeignTables with real CSEP and real TableAliasName structures + auto csep = createCSEP(); + + // Initially empty, should return false + EXPECT_FALSE(optimizer::details::someAreForeignTables(*csep)); + + // Create table lists using real TableAliasName structures + execplan::CalpontSelectExecutionPlan::TableList tables; + + // Add ColumnStore table (real structure) + auto csTable = createTableAlias("test_schema", "cs_table", "", true); + tables.push_back(csTable); + csep->tableList(tables); + + EXPECT_FALSE(optimizer::details::someAreForeignTables(*csep)); + + // Add foreign table (real structure) + auto foreignTable = createTableAlias("test_schema", "foreign_table", "", false); + tables.push_back(foreignTable); + csep->tableList(tables); + + EXPECT_TRUE(optimizer::details::someAreForeignTables(*csep)); +} + +// Test tableIsInUnion with real CSEP and TableAliasName +TEST_F(RBOHybridTest, TableIsInUnionWithRealStructures) +{ + auto csep = createCSEP(); + auto testTable = createTableAlias("test_schema", "test_table"); + + // Test with no unions + EXPECT_FALSE(optimizer::details::tableIsInUnion(testTable, *csep)); + + // Create a union subquery using real CSEP + auto unionPlan = createCSEP(); + execplan::CalpontSelectExecutionPlan::TableList unionTables; + unionTables.push_back(testTable); + unionPlan->tableList(unionTables); + + // Add to union vector + execplan::CalpontSelectExecutionPlan::SelectList unions; + boost::shared_ptr unionPlanBase = unionPlan; + unions.push_back(unionPlanBase); + csep->unionVec(unions); + + // Test with table present in union + EXPECT_TRUE(optimizer::details::tableIsInUnion(testTable, *csep)); + + // Test with table not present in union + auto otherTable = createTableAlias("test_schema", "other_table"); + EXPECT_FALSE(optimizer::details::tableIsInUnion(otherTable, *csep)); +} + +// Test real TableAliasName structure functionality +TEST_F(RBOHybridTest, RealTableAliasNameBasics) +{ + // Test creating and manipulating real TableAliasName structures + auto table1 = createTableAlias("schema1", "table1", "alias1", true); + auto table2 = createTableAlias("schema2", "table2", "", false); + + EXPECT_EQ("schema1", table1.schema); + EXPECT_EQ("table1", table1.table); + EXPECT_EQ("alias1", table1.alias); + EXPECT_TRUE(table1.fisColumnStore); + + EXPECT_EQ("schema2", table2.schema); + EXPECT_EQ("table2", table2.table); + EXPECT_EQ("table2", table2.alias); // Should default to table name + EXPECT_FALSE(table2.fisColumnStore); +} + +// Test real CalpontSelectExecutionPlan functionality +TEST_F(RBOHybridTest, RealCSEPBasics) +{ + auto csep = createCSEP(); + + // Test table list operations with real structures + execplan::CalpontSelectExecutionPlan::TableList tables; + auto table1 = createTableAlias("schema1", "table1"); + auto table2 = createTableAlias("schema2", "table2"); + tables.push_back(table1); + tables.push_back(table2); + csep->tableList(tables); + + const auto& retrievedTables = csep->tableList(); + EXPECT_EQ(2u, retrievedTables.size()); + EXPECT_EQ("schema1", retrievedTables[0].schema); + EXPECT_EQ("table1", retrievedTables[0].table); + EXPECT_EQ("schema2", retrievedTables[1].schema); + EXPECT_EQ("table2", retrievedTables[1].table); +} + +// Test with mock SimpleColumn (since real one requires Syscat) +TEST_F(RBOHybridTest, MockSimpleColumnBasics) +{ + auto column = createMockSimpleColumn("test_schema", "test_table", "test_column"); + + EXPECT_EQ("test_schema", column->schemaName()); + EXPECT_EQ("test_table", column->tableName()); + EXPECT_EQ("test_column", column->columnName()); + + // Test that we can modify column properties + column->schemaName("new_schema"); + column->tableName("new_table"); + column->columnName("new_column"); + + EXPECT_EQ("new_schema", column->schemaName()); + EXPECT_EQ("new_table", column->tableName()); + EXPECT_EQ("new_column", column->columnName()); +} + +// Test integration with real CSEP and mock columns +TEST_F(RBOHybridTest, CSEPWithMockColumns) +{ + auto csep = createCSEP(); + + // Test returned columns operations with mock SimpleColumns + execplan::CalpontSelectExecutionPlan::ReturnedColumnList cols; + auto column1 = createMockSimpleColumn("schema1", "table1", "col1"); + auto column2 = createMockSimpleColumn("schema2", "table2", "col2"); + + boost::shared_ptr col1Base = column1; + boost::shared_ptr col2Base = column2; + + cols.push_back(col1Base); + cols.push_back(col2Base); + csep->returnedCols(cols); + + const auto& retrievedCols = csep->returnedCols(); + EXPECT_EQ(2u, retrievedCols.size()); + + // Test casting back to MockSimpleColumn + auto mockCol1 = boost::dynamic_pointer_cast(retrievedCols[0]); + EXPECT_NE(nullptr, mockCol1); + if (mockCol1) + { + EXPECT_EQ("schema1", mockCol1->schemaName()); + EXPECT_EQ("table1", mockCol1->tableName()); + EXPECT_EQ("col1", mockCol1->columnName()); + } +} + +// Test helper functions that can work with mock context +TEST_F(RBOHybridTest, HelperFunctionsWithMockContext) +{ + auto csep = createCSEP(); + auto mockCtx = createMockOptimizerContext(); + + // Add mixed table types + execplan::CalpontSelectExecutionPlan::TableList tables; + auto csTable = createTableAlias("test_schema", "cs_table", "", true); + auto foreignTable = createTableAlias("test_schema", "foreign_table", "", false); + tables.push_back(csTable); + tables.push_back(foreignTable); + csep->tableList(tables); + + // Test someAreForeignTables (doesn't need context) + EXPECT_TRUE(optimizer::details::someAreForeignTables(*csep)); + + // Note: Functions that require real RBOptimizerContext would need to be tested + // with actual system infrastructure or more sophisticated mocking +} + +// Test edge cases with real structures +TEST_F(RBOHybridTest, EdgeCasesWithRealStructures) +{ + // Test with empty execution plan + auto emptyCSEP = createCSEP(); + EXPECT_FALSE(optimizer::details::someAreForeignTables(*emptyCSEP)); + + // Test with empty table alias + auto emptyTable = createTableAlias("", "", ""); + EXPECT_TRUE(emptyTable.schema.empty()); + EXPECT_TRUE(emptyTable.table.empty()); + + // Test TableAliasName comparison (if TableAliasLessThan is accessible) + auto table1 = createTableAlias("schema1", "table1"); + auto table2 = createTableAlias("schema2", "table2"); + + // These are real structures that can be compared + EXPECT_NE(table1.schema, table2.schema); + EXPECT_NE(table1.table, table2.table); +} + +// Test parallelCESFilter logic through helper functions (since direct testing requires complex +// RBOptimizerContext) +TEST_F(RBOHybridTest, ParallelCESFilterLogicTesting) +{ + auto csep = createCSEP(); + + // Test the first condition: someAreForeignTables + // Test 1: All ColumnStore tables - should return false + execplan::CalpontSelectExecutionPlan::TableList csOnlyTables; + auto csTable1 = createTableAlias("test_schema", "cs_table1", "", true); + auto csTable2 = createTableAlias("test_schema", "cs_table2", "", true); + csOnlyTables.push_back(csTable1); + csOnlyTables.push_back(csTable2); + csep->tableList(csOnlyTables); + + EXPECT_FALSE(optimizer::details::someAreForeignTables(*csep)); + + // Test 2: Mixed tables with foreign tables + execplan::CalpontSelectExecutionPlan::TableList mixedTables; + auto foreignTable = createTableAlias("test_schema", "foreign_table", "", false); + mixedTables.push_back(csTable1); + mixedTables.push_back(foreignTable); + csep->tableList(mixedTables); + + EXPECT_TRUE(optimizer::details::someAreForeignTables(*csep)); + + // Test 3: Only foreign tables + execplan::CalpontSelectExecutionPlan::TableList foreignOnlyTables; + auto foreignTable1 = createTableAlias("test_schema", "foreign_table1", "", false); + auto foreignTable2 = createTableAlias("test_schema", "foreign_table2", "", false); + foreignOnlyTables.push_back(foreignTable1); + foreignOnlyTables.push_back(foreignTable2); + csep->tableList(foreignOnlyTables); + + EXPECT_TRUE(optimizer::details::someAreForeignTables(*csep)); + + // Note: Testing the full parallelCESFilter function would require a real RBOptimizerContext + // with proper statistics setup, which is not feasible in unit tests without system infrastructure +} + +// Test applyParallelCES prerequisites and data structure setup +TEST_F(RBOHybridTest, ApplyParallelCESPrerequisites) +{ + auto csep = createCSEP(); + + // Set up a realistic test scenario that would be suitable for parallel rewrite + execplan::CalpontSelectExecutionPlan::TableList tables; + auto csTable = createTableAlias("test_schema", "cs_table", "", true); + auto foreignTable = createTableAlias("test_schema", "foreign_table", "", false); + tables.push_back(csTable); + tables.push_back(foreignTable); + csep->tableList(tables); + + // Add some mock columns + execplan::CalpontSelectExecutionPlan::ReturnedColumnList cols; + auto column = createMockSimpleColumn("test_schema", "foreign_table", "id"); + boost::shared_ptr columnBase = column; + cols.push_back(columnBase); + csep->returnedCols(cols); + + // Verify the setup meets the basic requirements for parallel rewrite consideration + EXPECT_TRUE(optimizer::details::someAreForeignTables(*csep)); // Has foreign tables + EXPECT_EQ(2u, csep->tableList().size()); // Has multiple tables + EXPECT_EQ(1u, csep->returnedCols().size()); // Has columns + EXPECT_EQ(0u, csep->unionVec().size()); // No existing unions + + // Verify table types are correctly identified + const auto& retrievedTables = csep->tableList(); + EXPECT_TRUE(retrievedTables[0].fisColumnStore); // cs_table + EXPECT_FALSE(retrievedTables[1].fisColumnStore); // foreign_table + + // Verify column can be cast back to SimpleColumn + const auto& retrievedCols = csep->returnedCols(); + auto simpleCol = boost::dynamic_pointer_cast(retrievedCols[0]); + EXPECT_NE(nullptr, simpleCol); + if (simpleCol) + { + EXPECT_EQ("test_schema", simpleCol->schemaName()); + EXPECT_EQ("foreign_table", simpleCol->tableName()); + EXPECT_EQ("id", simpleCol->columnName()); + } + + // Note: Testing the actual applyParallelCES function would require a real RBOptimizerContext + // with proper statistics setup, which is not feasible in unit tests without system infrastructure +} + +// Test parallelCESFilter edge cases through helper functions +TEST_F(RBOHybridTest, ParallelCESFilterEdgeCases) +{ + // Test 1: Empty execution plan + auto emptyCSEP = createCSEP(); + EXPECT_FALSE(optimizer::details::someAreForeignTables(*emptyCSEP)); // Should return false for empty plan + + // Test 2: Only foreign tables (no ColumnStore tables) + auto foreignOnlyCSEP = createCSEP(); + execplan::CalpontSelectExecutionPlan::TableList foreignTables; + auto foreignTable1 = createTableAlias("test_schema", "foreign_table1", "", false); + auto foreignTable2 = createTableAlias("test_schema", "foreign_table2", "", false); + foreignTables.push_back(foreignTable1); + foreignTables.push_back(foreignTable2); + foreignOnlyCSEP->tableList(foreignTables); + + // Verify that someAreForeignTables returns true + EXPECT_TRUE(optimizer::details::someAreForeignTables(*foreignOnlyCSEP)); + + // Test 3: Tables with union subqueries + auto unionCSEP = createCSEP(); + auto mainTable = createTableAlias("test_schema", "main_table", "", false); + execplan::CalpontSelectExecutionPlan::TableList mainTables; + mainTables.push_back(mainTable); + unionCSEP->tableList(mainTables); + + // Add union subquery + auto unionSubquery = createCSEP(); + auto unionTable = createTableAlias("test_schema", "union_table", "", true); + execplan::CalpontSelectExecutionPlan::TableList unionTables; + unionTables.push_back(unionTable); + unionSubquery->tableList(unionTables); + + execplan::CalpontSelectExecutionPlan::SelectList unions; + boost::shared_ptr unionBase = unionSubquery; + unions.push_back(unionBase); + unionCSEP->unionVec(unions); + + // Test tableIsInUnion functionality + EXPECT_TRUE(optimizer::details::tableIsInUnion(unionTable, *unionCSEP)); + EXPECT_FALSE(optimizer::details::tableIsInUnion(mainTable, *unionCSEP)); +} + +// Test applyParallelCES scenarios through data structure validation +TEST_F(RBOHybridTest, ApplyParallelCESScenarios) +{ + // Scenario 1: Query that should not be rewritten (all ColumnStore tables) + auto csOnlyCSEP = createCSEP(); + execplan::CalpontSelectExecutionPlan::TableList csTables; + auto csTable1 = createTableAlias("test_schema", "cs_table1", "", true); + auto csTable2 = createTableAlias("test_schema", "cs_table2", "", true); + csTables.push_back(csTable1); + csTables.push_back(csTable2); + csOnlyCSEP->tableList(csTables); + + // Should not apply parallel rewrite + EXPECT_FALSE(optimizer::details::someAreForeignTables(*csOnlyCSEP)); + + // Scenario 2: Query with foreign tables but no statistics + auto noStatsCSEP = createCSEP(); + execplan::CalpontSelectExecutionPlan::TableList mixedTables; + auto foreignTable = createTableAlias("test_schema", "foreign_table", "", false); + mixedTables.push_back(csTable1); + mixedTables.push_back(foreignTable); + noStatsCSEP->tableList(mixedTables); + + // Has foreign tables but no statistics in mock context + EXPECT_TRUE(optimizer::details::someAreForeignTables(*noStatsCSEP)); + + // Scenario 3: Complex query with multiple foreign tables and columns + auto complexCSEP = createCSEP(); + execplan::CalpontSelectExecutionPlan::TableList complexTables; + auto foreignTable1 = createTableAlias("schema1", "foreign_table1", "ft1", false); + auto foreignTable2 = createTableAlias("schema2", "foreign_table2", "ft2", false); + auto csTable = createTableAlias("schema1", "cs_table", "ct", true); + complexTables.push_back(foreignTable1); + complexTables.push_back(foreignTable2); + complexTables.push_back(csTable); + complexCSEP->tableList(complexTables); + + // Add multiple columns + execplan::CalpontSelectExecutionPlan::ReturnedColumnList complexCols; + auto col1 = createMockSimpleColumn("schema1", "foreign_table1", "id"); + auto col2 = createMockSimpleColumn("schema1", "foreign_table1", "name"); + auto col3 = createMockSimpleColumn("schema2", "foreign_table2", "value"); + auto col4 = createMockSimpleColumn("schema1", "cs_table", "cs_id"); + + boost::shared_ptr col1Base = col1; + boost::shared_ptr col2Base = col2; + boost::shared_ptr col3Base = col3; + boost::shared_ptr col4Base = col4; + + complexCols.push_back(col1Base); + complexCols.push_back(col2Base); + complexCols.push_back(col3Base); + complexCols.push_back(col4Base); + complexCSEP->returnedCols(complexCols); + + // Verify the setup + EXPECT_TRUE(optimizer::details::someAreForeignTables(*complexCSEP)); + EXPECT_EQ(3u, complexCSEP->tableList().size()); + EXPECT_EQ(4u, complexCSEP->returnedCols().size()); + + // Test that we can identify foreign vs ColumnStore tables + const auto& retrievedTables = complexCSEP->tableList(); + EXPECT_FALSE(retrievedTables[0].fisColumnStore); // foreign_table1 + EXPECT_FALSE(retrievedTables[1].fisColumnStore); // foreign_table2 + EXPECT_TRUE(retrievedTables[2].fisColumnStore); // cs_table +}