1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-5477 Disk join step improvement.

This patch:
1. Handles corner case when the bucket exceeded the memory limit, but we cannot redistribute the data in this bucket into new buckets based on a hash algorithm, because the rows have the same values.
2. Adds force option for disk join step.
3. Add a option to contol the depth of the partition tree.
This commit is contained in:
Denis Khalikov
2023-05-15 16:44:06 +03:00
parent 375d162376
commit 1f190a6e75
13 changed files with 309 additions and 165 deletions

View File

@ -106,6 +106,8 @@ TupleHashJoinStep::TupleHashJoinStep(const JobInfo& jobInfo)
djsSmallLimit = jobInfo.smallSideLimit;
djsLargeLimit = jobInfo.largeSideLimit;
djsPartitionSize = jobInfo.partitionSize;
djsMaxPartitionTreeDepth = jobInfo.djsMaxPartitionTreeDepth;
djsForceRun = jobInfo.djsForceRun;
isDML = jobInfo.isDML;
config::Config* config = config::Config::makeConfig();
@ -1971,53 +1973,55 @@ void TupleHashJoinStep::segregateJoiners()
return;
}
/* If they are all inner joins they can be segregated w/o respect to
ordering; if they're not, the ordering has to stay consistent therefore
the first joiner that isn't finished and everything after has to be
done by DJS. */
if (allInnerJoins)
// Force all joins into disk based.
if (djsForceRun)
{
for (i = 0; i < smallSideCount; i++)
for (i = 0; i < smallSideCount; ++i)
{
// if (joiners[i]->isFinished() && (rand() % 2)) { // for debugging
if (joiners[i]->isFinished())
{
// cout << "1joiner " << i << " " << hex << (uint64_t) joiners[i].get() << dec << " -> TBPS" << endl;
tbpsJoiners.push_back(joiners[i]);
}
else
{
joinIsTooBig = true;
joiners[i]->setConvertToDiskJoin();
// cout << "1joiner " << i << " " << hex << (uint64_t) joiners[i].get() << dec << " -> DJS" << endl;
djsJoiners.push_back(joiners[i]);
djsJoinerMap.push_back(i);
}
joinIsTooBig = true;
joiners[i]->setConvertToDiskJoin();
djsJoiners.push_back(joiners[i]);
djsJoinerMap.push_back(i);
}
}
else
{
// uint limit = rand() % smallSideCount;
for (i = 0; i < smallSideCount; i++)
/* If they are all inner joins they can be segregated w/o respect to
ordering; if they're not, the ordering has to stay consistent therefore
the first joiner that isn't finished and everything after has to be
done by DJS. */
if (allInnerJoins)
{
// if (joiners[i]->isFinished() && i < limit) { // debugging
if (joiners[i]->isFinished())
for (i = 0; i < smallSideCount; i++)
{
// cout << "2joiner " << i << " " << hex << (uint64_t) joiners[i].get() << dec << " -> TBPS" << endl;
tbpsJoiners.push_back(joiners[i]);
if (joiners[i]->isFinished())
tbpsJoiners.push_back(joiners[i]);
else
{
joinIsTooBig = true;
joiners[i]->setConvertToDiskJoin();
djsJoiners.push_back(joiners[i]);
djsJoinerMap.push_back(i);
}
}
else
break;
}
for (; i < smallSideCount; i++)
else
{
joinIsTooBig = true;
joiners[i]->setConvertToDiskJoin();
// cout << "2joiner " << i << " " << hex << (uint64_t) joiners[i].get() << dec << " -> DJS" << endl;
djsJoiners.push_back(joiners[i]);
djsJoinerMap.push_back(i);
for (i = 0; i < smallSideCount; i++)
{
if (joiners[i]->isFinished())
tbpsJoiners.push_back(joiners[i]);
else
break;
}
for (; i < smallSideCount; i++)
{
joinIsTooBig = true;
joiners[i]->setConvertToDiskJoin();
djsJoiners.push_back(joiners[i]);
djsJoinerMap.push_back(i);
}
}
}
}