diff --git a/dbcon/joblist/jlf_common.h b/dbcon/joblist/jlf_common.h index 745706522..5ec3e6d66 100644 --- a/dbcon/joblist/jlf_common.h +++ b/dbcon/joblist/jlf_common.h @@ -199,6 +199,7 @@ struct JobInfo , limitStart(0) , limitCount(-1) , joinNum(0) + , joinNumInView(0) , subLevel(0) , subNum(0) , subId(0) @@ -292,6 +293,8 @@ struct JobInfo // mixed outer join std::map tableSize; int64_t joinNum; + // MCOL-5061, MCOL-334. + int64_t joinNumInView; // for subquery boost::shared_ptr subCount; // # of subqueries in the query statement @@ -367,9 +370,9 @@ struct JobInfo PrimitiveServerThreadPools primitiveServerThreadPools; // Represents a `join edges` and `join id` to be restored in `join order` part. - std::map, uint32_t> joinEdgesToRestore; + std::map, int64_t> joinEdgesToRestore; // Represents a pair of `table` to be on a large side and weight associated with that table. - std::unordered_map tablesForLargeSide; + std::unordered_map tablesForLargeSide; private: // defaults okay diff --git a/dbcon/joblist/jlf_execplantojoblist.cpp b/dbcon/joblist/jlf_execplantojoblist.cpp index 441ee1ea4..64f752807 100644 --- a/dbcon/joblist/jlf_execplantojoblist.cpp +++ b/dbcon/joblist/jlf_execplantojoblist.cpp @@ -1166,7 +1166,9 @@ const JobStepVector doJoin(SimpleColumn* sc1, SimpleColumn* sc2, JobInfo& jobInf // MCOL-334 joins in views need to have higher priority than SEMI/ANTI if (!view1.empty() && view1 == view2) { - thj->joinId(-1); + // MCOL-5061. We could have a filters to be associated with a specific join id, therefore we cannot have + // the same join id for different `TupleHashJoin` steps. + thj->joinId(std::numeric_limits::min() + (++jobInfo.joinNumInView)); } else { diff --git a/dbcon/joblist/jlf_tuplejoblist.cpp b/dbcon/joblist/jlf_tuplejoblist.cpp index 580be6dea..8a176c4ab 100644 --- a/dbcon/joblist/jlf_tuplejoblist.cpp +++ b/dbcon/joblist/jlf_tuplejoblist.cpp @@ -1621,13 +1621,13 @@ class CircularJoinGraphTransformer // For each cycle breaks it and collects join edges. void breakCyclesAndCollectJoinEdges(); // Removes given `join edge` from the `join graph`. - void breakCycleAndCollectJoinEdge(const std::pair& edgeForward); + void breakCycleAndCollectJoinEdge(const std::pair& edgeForward); // Initializes the `join graph` based on the table connections. virtual void initializeJoinGraph(); // Check if the given join edge has FK - FK relations. bool isForeignKeyForeignKeyLink(const JoinEdge& edge, statistics::StatisticsManager* statisticsManager); // Based on column statistics tries to search `join edge` with maximum join cardinality. - virtual void chooseEdgeToTransform(Cycle& cycle, std::pair& resultEdge); + virtual void chooseEdgeToTransform(Cycle& cycle, std::pair& resultEdge); // Removes given `tableId` from adjacent list. void removeFromAdjacentList(uint32_t tableId, std::vector& adjList); // Removes associated join step associated with the given `joinEdge` from job steps. @@ -1675,7 +1675,7 @@ void CircularJoinGraphTransformer::analyzeJoinGraph(uint32_t currentTable, uint3 auto nodeIt = currentTable; auto nextNode = joinGraph[nodeIt].fParent; // Walk back until we find node `adjNode` we identified before. - while (nextNode != UINT_MAX && nextNode != adjNode) + while (nextNode != std::numeric_limits::max() && nextNode != adjNode) { const auto edgeForward = make_pair(nextNode, nodeIt); const auto edgeBackward = make_pair(nodeIt, nextNode); @@ -1691,7 +1691,7 @@ void CircularJoinGraphTransformer::analyzeJoinGraph(uint32_t currentTable, uint3 } // Add the last edge. - if (nextNode != UINT_MAX) + if (nextNode != std::numeric_limits::max()) { const auto edgeForward = make_pair(nextNode, nodeIt); const auto edgeBackward = make_pair(nodeIt, nextNode); @@ -1813,7 +1813,7 @@ bool CircularJoinGraphTransformer::isForeignKeyForeignKeyLink( } void CircularJoinGraphTransformer::chooseEdgeToTransform(Cycle& cycle, - std::pair& resultEdge) + std::pair& resultEdge) { // Use statistics if possible. auto* statisticsManager = statistics::StatisticsManager::instance(); @@ -1902,7 +1902,7 @@ void CircularJoinGraphTransformer::removeAssociatedHashJoinStepFromJoinSteps(con } void CircularJoinGraphTransformer::breakCycleAndCollectJoinEdge( - const std::pair& joinEdgeWithWeight) + const std::pair& joinEdgeWithWeight) { // Add edge to be restored. jobInfo.joinEdgesToRestore.insert({joinEdgeWithWeight.first, joinEdgeWithWeight.second}); @@ -1937,7 +1937,7 @@ void CircularJoinGraphTransformer::breakCyclesAndCollectJoinEdges() // For each cycle. for (auto& cycle : cycles) { - std::pair joinEdgeWithWeight; + std::pair joinEdgeWithWeight; chooseEdgeToTransform(cycle, joinEdgeWithWeight); breakCycleAndCollectJoinEdge(joinEdgeWithWeight); } @@ -1960,7 +1960,7 @@ void CircularJoinGraphTransformer::initializeJoinGraph() void CircularJoinGraphTransformer::transformJoinGraph() { initializeJoinGraph(); - analyzeJoinGraph(/*currentTable=*/headTable, /*prevTable=*/UINT_MAX); + analyzeJoinGraph(/*currentTable=*/headTable, /*prevTable=*/std::numeric_limits::max()); edgesToTransform.clear(); breakCyclesAndCollectJoinEdges(); } @@ -1993,23 +1993,23 @@ class CircularOuterJoinGraphTransformer : public CircularJoinGraphTransformer void analyzeJoinGraph(uint32_t currentTable, uint32_t prevTable) override; // Chooses a join edge to transform from the given cycle based on the join edge weight, // the join edge for transformation has a maximum weight in a cycle. - void chooseEdgeToTransform(Cycle& cycle, std::pair& resultEdge) override; + void chooseEdgeToTransform(Cycle& cycle, std::pair& resultEdge) override; // Returns the min weight among all join weights related to the given `headTable`. - uint32_t getSublingsMinWeight(uint32_t headTable, uint32_t associatedTable); + int64_t getSublingsMinWeight(uint32_t headTable, uint32_t associatedTable); // Returns the max weight which is less than given `upperBoundWeight` among all join weights related to // the given `headTable`. - uint32_t getSublingsMaxWeightLessThan(uint32_t headTable, uint32_t associatedTable, - uint32_t upperBoundWeight); + int64_t getSublingsMaxWeightLessThan(uint32_t headTable, uint32_t associatedTable, + int64_t upperBoundWeight); // Initializes `join graph` from the table connections. void initializeJoinGraph() override; // The map which represents a weight for each join edge in join graph. - std::map joinEdgesToWeights; + std::map joinEdgesToWeights; }; -uint32_t CircularOuterJoinGraphTransformer::getSublingsMinWeight(uint32_t headTable, uint32_t associatedTable) +int64_t CircularOuterJoinGraphTransformer::getSublingsMinWeight(uint32_t headTable, uint32_t associatedTable) { - uint32_t minWeight = UINT_MAX; + int64_t minWeight = std::numeric_limits::max(); for (const auto adjNode : joinGraph[headTable].fAdjacentList) { if (adjNode != associatedTable) @@ -2021,11 +2021,11 @@ uint32_t CircularOuterJoinGraphTransformer::getSublingsMinWeight(uint32_t headTa return minWeight; } -uint32_t CircularOuterJoinGraphTransformer::getSublingsMaxWeightLessThan(uint32_t headTable, - uint32_t associatedTable, - uint32_t upperBoundWeight) +int64_t CircularOuterJoinGraphTransformer::getSublingsMaxWeightLessThan(uint32_t headTable, + uint32_t associatedTable, + int64_t upperBoundWeight) { - uint32_t maxWeight = 0; + int64_t maxWeight = std::numeric_limits::min(); for (const auto adjNode : joinGraph[headTable].fAdjacentList) { if (adjNode != associatedTable) @@ -2048,7 +2048,7 @@ void CircularOuterJoinGraphTransformer::initializeJoinGraph() if (jobInfo.trace) std::cout << "Join edges with weights.\n"; - uint32_t minWeightFullGraph = UINT_MAX; + int64_t minWeightFullGraph = std::numeric_limits::max(); JoinEdge joinEdgeWithMinWeight(0, 0); // For each join step we associate a `join id` with `join edge`. @@ -2057,7 +2057,7 @@ void CircularOuterJoinGraphTransformer::initializeJoinGraph() auto* tupleHashJoinStep = dynamic_cast(joinStepIt->get()); if (tupleHashJoinStep) { - const uint32_t weight = tupleHashJoinStep->joinId(); + const int64_t weight = tupleHashJoinStep->joinId(); const auto tableKey1 = getTableKey(jobInfo, tupleHashJoinStep->tupleId1()); const auto tableKey2 = getTableKey(jobInfo, tupleHashJoinStep->tupleId2()); @@ -2110,7 +2110,7 @@ void CircularOuterJoinGraphTransformer::analyzeJoinGraph(uint32_t currentTable, joinGraph[currentTable].fTableColor = JoinTableColor::GREY; joinGraph[currentTable].fParent = prevTable; - std::vector> adjacentListWeighted; + std::vector> adjacentListWeighted; // For each adjacent node. for (const auto adjNode : joinGraph[currentTable].fAdjacentList) { @@ -2124,7 +2124,7 @@ void CircularOuterJoinGraphTransformer::analyzeJoinGraph(uint32_t currentTable, // Sort vertices by weights. std::sort(adjacentListWeighted.begin(), adjacentListWeighted.end(), - [](const std::pair& a, const std::pair& b) { + [](const std::pair& a, const std::pair& b) { return a.second < b.second; }); @@ -2148,7 +2148,7 @@ void CircularOuterJoinGraphTransformer::analyzeJoinGraph(uint32_t currentTable, auto nodeIt = currentTable; auto nextNode = joinGraph[nodeIt].fParent; // Walk back until we find node `adjNode` we identified before. - while (nextNode != UINT_MAX && nextNode != adjNode) + while (nextNode != std::numeric_limits::max() && nextNode != adjNode) { const auto edgeForward = make_pair(nextNode, nodeIt); const auto edgeBackward = make_pair(nodeIt, nextNode); @@ -2164,7 +2164,7 @@ void CircularOuterJoinGraphTransformer::analyzeJoinGraph(uint32_t currentTable, } // Add the last edge. - if (nextNode != UINT_MAX) + if (nextNode != std::numeric_limits::max()) { const auto edgeForward = make_pair(nextNode, nodeIt); const auto edgeBackward = make_pair(nodeIt, nextNode); @@ -2190,9 +2190,9 @@ void CircularOuterJoinGraphTransformer::analyzeJoinGraph(uint32_t currentTable, } void CircularOuterJoinGraphTransformer::chooseEdgeToTransform(Cycle& cycle, - std::pair& resultEdge) + std::pair& resultEdge) { - uint32_t maxWeightInCycle = 0; + int64_t maxWeightInCycle = std::numeric_limits::min(); JoinEdge joinEdgeWithMaxWeight; if (jobInfo.trace) @@ -2226,6 +2226,10 @@ void CircularOuterJoinGraphTransformer::chooseEdgeToTransform(Cycle& cycle, maxWeightInCycle)) largeSideTable = joinEdgeWithMaxWeight.second; + if (maxWeightInCycle < 0) + maxWeightInCycle = std::numeric_limits::max() + maxWeightInCycle + 1; + idbassert(maxWeightInCycle > 0); + // Add large table to the map for the `join ordering` part. if (!jobInfo.tablesForLargeSide.count(largeSideTable)) jobInfo.tablesForLargeSide.insert({largeSideTable, maxWeightInCycle}); @@ -2758,7 +2762,7 @@ bool matchKeys(const vector& keysToSearch, const vector& key void tryToRestoreJoinEdges(JobInfo& jobInfo, JoinInfo* joinInfo, const RowGroup& largeSideRG, std::vector& smallKeyIndices, std::vector& largeKeyIndices, - std::vector& traces, std::map& joinIndexIdMap, + std::vector& traces, std::map& joinIndexIdMap, uint32_t smallSideIndex) { if (!jobInfo.joinEdgesToRestore.size()) @@ -2771,7 +2775,7 @@ void tryToRestoreJoinEdges(JobInfo& jobInfo, JoinInfo* joinInfo, const RowGroup& std::vector smallKeyIndicesToRestore; std::vector largeKeyIndicesToRestore; - std::vector> takenEdgesWithJoinIDs; + std::vector> takenEdgesWithJoinIDs; auto& joinEdgesToRestore = jobInfo.joinEdgesToRestore; // We could have a multple join edges to restore from the same vertex e.g: @@ -2887,7 +2891,7 @@ void tryToRestoreJoinEdges(JobInfo& jobInfo, JoinInfo* joinInfo, const RowGroup& } void matchEdgesInResultRowGroup(const JobInfo& jobInfo, const RowGroup& rg, - std::map& edgesToRestore, + std::map& edgesToRestore, PostJoinFilterKeys& postJoinFilterKeys) { if (jobInfo.trace) @@ -3069,7 +3073,7 @@ void createPostJoinFilters(const JobInfo& jobInfo, TableInfoMap& tableInfoMap, } SP_JoinInfo joinToLargeTable(uint32_t large, TableInfoMap& tableInfoMap, JobInfo& jobInfo, - vector& joinOrder, std::map& joinEdgesToRestore) + vector& joinOrder, std::map& joinEdgesToRestore) { vector smallSides; tableInfoMap[large].fVisited = true; @@ -3578,7 +3582,7 @@ struct JoinOrderData { uint32_t fTid1; uint32_t fTid2; - uint32_t fJoinId; + int64_t fJoinId; }; void getJoinOrder(vector& joins, JobStepVector& joinSteps, JobInfo& jobInfo) @@ -3642,7 +3646,7 @@ void joinTablesInOrder(uint32_t largest, JobStepVector& joinSteps, TableInfoMap& // 0 - prefer to be on small side, like FROM subquery; // 1 - can be on either large or small side; // 2 - must be on large side. - map> joinStepMap; + map> joinStepMap; BatchPrimitive* bps = NULL; SubAdapterStep* tsas = NULL; TupleHashJoinStep* thjs = NULL; @@ -3715,7 +3719,7 @@ void joinTablesInOrder(uint32_t largest, JobStepVector& joinSteps, TableInfoMap& } // sort the join steps based on join ID. - vector joins; + std::vector joins; getJoinOrder(joins, joinSteps, jobInfo); // join the steps @@ -3775,6 +3779,7 @@ void joinTablesInOrder(uint32_t largest, JobStepVector& joinSteps, TableInfoMap& // merge in the next step if the large side is the same for (uint64_t ns = js + 1; ns < joins.size(); js++, ns++) { + // Check if FE needs table in previous smallsides. uint32_t tid1 = joins[ns].fTid1; uint32_t tid2 = joins[ns].fTid2; uint32_t small = (uint32_t)-1; @@ -3901,8 +3906,8 @@ void joinTablesInOrder(uint32_t largest, JobStepVector& joinSteps, TableInfoMap& } uint32_t smallSideIndex = 0; - std::map joinIdIndexMap; - + // Join id to table id. + std::map joinIdIndexMap; for (vector::iterator i = smallSides.begin(); i != smallSides.end(); i++, smallSideIndex++) { JoinInfo* info = i->get(); @@ -4052,7 +4057,7 @@ void joinTablesInOrder(uint32_t largest, JobStepVector& joinSteps, TableInfoMap& // The map for in clause filter. for (size_t i = 0; i < smallSides.size(); i++) { - if (smallSides[i]->fJoinData.fJoinId > 0) + if (smallSides[i]->fJoinData.fJoinId != 0) joinIdIndexMap[smallSides[i]->fJoinData.fJoinId] = i; } @@ -4119,10 +4124,10 @@ void joinTablesInOrder(uint32_t largest, JobStepVector& joinSteps, TableInfoMap& keyToIndexMap.insert(make_pair(rowGroupKeys[i], i)); // tables have additional comparisons - map correlateTables; // index in thjs + map correlateTables; // index in thjs map correlateCompare; // expression - for (size_t i = 0; i != smallSides.size(); i++) + for (uint32_t i = 0; i != smallSides.size(); i++) { if ((jointypes[i] & SEMI) || (jointypes[i] & ANTI) || (jointypes[i] & SCALAR)) { @@ -4150,9 +4155,9 @@ void joinTablesInOrder(uint32_t largest, JobStepVector& joinSteps, TableInfoMap& } const vector& tables = e->tableKeys(); - map::iterator j = correlateTables.end(); + auto j = correlateTables.end(); - for (size_t i = 0; i < tables.size(); i++) + for (uint32_t i = 0; i < tables.size(); i++) { j = correlateTables.find(tables[i]); @@ -4191,7 +4196,7 @@ void joinTablesInOrder(uint32_t largest, JobStepVector& joinSteps, TableInfoMap& eit = readyExpSteps.erase(eit); } - map::iterator k = correlateTables.begin(); + auto k = correlateTables.begin(); while (k != correlateTables.end()) { diff --git a/mysql-test/columnstore/basic/r/mcs63_crossengine_views.result b/mysql-test/columnstore/basic/r/mcs63_crossengine_views.result index fcb1a4bdc..6491a30b9 100644 --- a/mysql-test/columnstore/basic/r/mcs63_crossengine_views.result +++ b/mysql-test/columnstore/basic/r/mcs63_crossengine_views.result @@ -63,13 +63,13 @@ select `mcs63_db`.`t1`.`t1_int` AS `t1_int`,`mcs63_db`.`t1`.`t1_char` AS `t1_cha SELECT * FROM v4; t1_int t1_char t2_int t2_char 1 aaa 1 hhh -3 ccc 3 iii -5 eee 5 jjj -7 gggg 7 llll NULL NULL NULL NULL 2 bbb NULL NULL +3 ccc NULL NULL 4 ddd NULL NULL +5 eee NULL NULL 6 fff NULL NULL +7 gggg NULL NULL CREATE VIEW v5 AS SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_int = t2.t2_int; SELECT VIEW_DEFINITION FROM INFORMATION_SCHEMA.VIEWS WHERE TABLE_NAME='v5' AND TABLE_SCHEMA='mcs63_db'; VIEW_DEFINITION @@ -105,10 +105,10 @@ select `mcs63_db`.`t1`.`t1_int` AS `t1_int`,`mcs63_db`.`t1`.`t1_char` AS `t1_cha SELECT * FROM v7; t1_int t1_char t2_int t2_char NULL NULL NULL NULL -1 aaa 1 hhh -3 ccc 3 iii +NULL NULL 1 hhh +NULL NULL 3 iii 5 eee 5 jjj -7 gggg 7 llll +NULL NULL 7 llll NULL NULL 9 kkkk NULL NULL 11 mm NULL NULL 13 n diff --git a/mysql-test/columnstore/bugfixes/mcol-5061.result b/mysql-test/columnstore/bugfixes/mcol-5061.result new file mode 100644 index 000000000..4453c59df --- /dev/null +++ b/mysql-test/columnstore/bugfixes/mcol-5061.result @@ -0,0 +1,20 @@ +DROP DATABASE IF EXISTS mcol_5061; +CREATE DATABASE mcol_5061; +USE mcol_5061; +create table t1 (a int, b int) engine=columnstore; +create table t2 (a int, b int) engine=columnstore; +insert into t1 values (1, 3), (2, 3), (3, 4); +insert into t2 values (1, 2), (2, 4), (4, 5); +select t1.a as a, t1.b as b, t2.a as c, t2.b as d, t2_1.a as e, t2_1.b as f from t1 left join t2 on (t1.a = t2.a and t2.a > 1) left join t2 as t2_1 on (t1.b = t2_1.b and t2_1.a > 1); +a b c d e f +1 3 NULL NULL NULL NULL +2 3 2 4 NULL NULL +3 4 NULL NULL 2 4 +create or replace view view_test as +select t1.a as a, t1.b as b, t2.a as c, t2.b as d, t2_1.a as e, t2_1.b as f from t1 left join t2 on (t1.a = t2.a and t2.a > 1) left join t2 as t2_1 on (t1.b = t2_1.b and t2_1.a > 1); +select * from view_test; +a b c d e f +1 3 NULL NULL NULL NULL +2 3 2 4 NULL NULL +3 4 NULL NULL 2 4 +DROP DATABASE mcol_5061; diff --git a/mysql-test/columnstore/bugfixes/mcol-5061.test b/mysql-test/columnstore/bugfixes/mcol-5061.test new file mode 100644 index 000000000..c4c0e0e6b --- /dev/null +++ b/mysql-test/columnstore/bugfixes/mcol-5061.test @@ -0,0 +1,26 @@ +# +# Test based on Jira MCOL-5061 +# Reduced customer test case. +# + +--source ../include/have_columnstore.inc + +--disable_warnings +DROP DATABASE IF EXISTS mcol_5061; +--enable_warnings + +CREATE DATABASE mcol_5061; +USE mcol_5061; + +create table t1 (a int, b int) engine=columnstore; +create table t2 (a int, b int) engine=columnstore; + +insert into t1 values (1, 3), (2, 3), (3, 4); +insert into t2 values (1, 2), (2, 4), (4, 5); +select t1.a as a, t1.b as b, t2.a as c, t2.b as d, t2_1.a as e, t2_1.b as f from t1 left join t2 on (t1.a = t2.a and t2.a > 1) left join t2 as t2_1 on (t1.b = t2_1.b and t2_1.a > 1); + +create or replace view view_test as +select t1.a as a, t1.b as b, t2.a as c, t2.b as d, t2_1.a as e, t2_1.b as f from t1 left join t2 on (t1.a = t2.a and t2.a > 1) left join t2 as t2_1 on (t1.b = t2_1.b and t2_1.a > 1); +select * from view_test; + +DROP DATABASE mcol_5061;