1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-5539 Put table on small side if it was involved in prev.join. (#2945)

This commit is contained in:
Denis Khalikov
2023-09-05 12:19:43 +03:00
committed by GitHub
parent d586975da7
commit add3a57e8d
3 changed files with 34 additions and 24 deletions

View File

@ -705,7 +705,8 @@ void addProjectStepsToBps(TableInfoMap::iterator& mit, BatchPrimitive* bps, JobI
{
// if (jobInfo.trace && bps->tableOid() >= 3000)
// cout << "1 setting project BPP for " << tbps->toString() << " with "
//<< it->get()->toString() << " and " << (it+1)->get()->toString() << endl;
//<< it->get()->toString() << " and " << (it+1)->get()->toString()
//<< endl;
bps->setProjectBPP(it->get(), (it + 1)->get());
// this is a two-step project step, remove the token step from id vector
@ -1876,7 +1877,6 @@ void CircularJoinGraphTransformer::removeAssociatedHashJoinStepFromJoinSteps(con
if ((tableKey1 == joinEdge.first && tableKey2 == joinEdge.second) ||
(tableKey1 == joinEdge.second && tableKey2 == joinEdge.first))
{
if (jobInfo.trace)
std::cout << "Erase matched join step with keys: " << tableKey1 << " <-> " << tableKey2
<< std::endl;
@ -2130,9 +2130,8 @@ void CircularOuterJoinGraphTransformer::analyzeJoinGraph(uint32_t currentTable,
// Sort vertices by weights.
std::sort(adjacentListWeighted.begin(), adjacentListWeighted.end(),
[](const std::pair<uint32_t, int64_t>& a, const std::pair<uint32_t, int64_t>& b) {
return a.second < b.second;
});
[](const std::pair<uint32_t, int64_t>& a, const std::pair<uint32_t, int64_t>& b)
{ return a.second < b.second; });
// For each weighted adjacent node.
for (const auto& adjNodeWeighted : adjacentListWeighted)
@ -3796,10 +3795,21 @@ void joinTablesInOrder(uint32_t largest, JobStepVector& joinSteps, TableInfoMap&
small = tid1;
}
// MCOL-5539. If the current large table involved in the previous join and it was not merged into
// "single large side multiple small sides" optimization, it should be on a small side,
// because it represents a intermediate join result and its rowgroup could be a combination of multiple
// rowgroups, therefore it should be sent to BPP as a small side, we cannot read it from disk.
if (find(joinedTable.begin(), joinedTable.end(), large) != joinedTable.end() &&
joinStepMap[small].second > 0)
{
std::swap(large, small);
}
updateJoinSides(small, large, joinInfoMap, smallSides, tableInfoMap, jobInfo);
// This is a table for multiple join edges, always a stream table.
if (joinStepMap[large].second > 2)
// If `largest` table is equal to the current `large` table - it's an umstream table.
if (joinStepMap[large].second > 2 || large == largest)
umstream = true;
if (find(joinedTable.begin(), joinedTable.end(), small) == joinedTable.end())