You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
MCOL-5477 Disk join step improvement.
This patch: 1. Handles corner case when the bucket exceeded the memory limit, but we cannot redistribute the data in this bucket into new buckets based on a hash algorithm, because the rows have the same values. 2. Adds force option for disk join step. 3. Add a option to contol the depth of the partition tree.
This commit is contained in:
@ -106,6 +106,8 @@ TupleHashJoinStep::TupleHashJoinStep(const JobInfo& jobInfo)
|
||||
djsSmallLimit = jobInfo.smallSideLimit;
|
||||
djsLargeLimit = jobInfo.largeSideLimit;
|
||||
djsPartitionSize = jobInfo.partitionSize;
|
||||
djsMaxPartitionTreeDepth = jobInfo.djsMaxPartitionTreeDepth;
|
||||
djsForceRun = jobInfo.djsForceRun;
|
||||
isDML = jobInfo.isDML;
|
||||
|
||||
config::Config* config = config::Config::makeConfig();
|
||||
@ -1971,53 +1973,55 @@ void TupleHashJoinStep::segregateJoiners()
|
||||
return;
|
||||
}
|
||||
|
||||
/* If they are all inner joins they can be segregated w/o respect to
|
||||
ordering; if they're not, the ordering has to stay consistent therefore
|
||||
the first joiner that isn't finished and everything after has to be
|
||||
done by DJS. */
|
||||
|
||||
if (allInnerJoins)
|
||||
// Force all joins into disk based.
|
||||
if (djsForceRun)
|
||||
{
|
||||
for (i = 0; i < smallSideCount; i++)
|
||||
for (i = 0; i < smallSideCount; ++i)
|
||||
{
|
||||
// if (joiners[i]->isFinished() && (rand() % 2)) { // for debugging
|
||||
if (joiners[i]->isFinished())
|
||||
{
|
||||
// cout << "1joiner " << i << " " << hex << (uint64_t) joiners[i].get() << dec << " -> TBPS" << endl;
|
||||
tbpsJoiners.push_back(joiners[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
joinIsTooBig = true;
|
||||
joiners[i]->setConvertToDiskJoin();
|
||||
// cout << "1joiner " << i << " " << hex << (uint64_t) joiners[i].get() << dec << " -> DJS" << endl;
|
||||
djsJoiners.push_back(joiners[i]);
|
||||
djsJoinerMap.push_back(i);
|
||||
}
|
||||
joinIsTooBig = true;
|
||||
joiners[i]->setConvertToDiskJoin();
|
||||
djsJoiners.push_back(joiners[i]);
|
||||
djsJoinerMap.push_back(i);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// uint limit = rand() % smallSideCount;
|
||||
for (i = 0; i < smallSideCount; i++)
|
||||
/* If they are all inner joins they can be segregated w/o respect to
|
||||
ordering; if they're not, the ordering has to stay consistent therefore
|
||||
the first joiner that isn't finished and everything after has to be
|
||||
done by DJS. */
|
||||
if (allInnerJoins)
|
||||
{
|
||||
// if (joiners[i]->isFinished() && i < limit) { // debugging
|
||||
if (joiners[i]->isFinished())
|
||||
for (i = 0; i < smallSideCount; i++)
|
||||
{
|
||||
// cout << "2joiner " << i << " " << hex << (uint64_t) joiners[i].get() << dec << " -> TBPS" << endl;
|
||||
tbpsJoiners.push_back(joiners[i]);
|
||||
if (joiners[i]->isFinished())
|
||||
tbpsJoiners.push_back(joiners[i]);
|
||||
else
|
||||
{
|
||||
joinIsTooBig = true;
|
||||
joiners[i]->setConvertToDiskJoin();
|
||||
djsJoiners.push_back(joiners[i]);
|
||||
djsJoinerMap.push_back(i);
|
||||
}
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
for (; i < smallSideCount; i++)
|
||||
else
|
||||
{
|
||||
joinIsTooBig = true;
|
||||
joiners[i]->setConvertToDiskJoin();
|
||||
// cout << "2joiner " << i << " " << hex << (uint64_t) joiners[i].get() << dec << " -> DJS" << endl;
|
||||
djsJoiners.push_back(joiners[i]);
|
||||
djsJoinerMap.push_back(i);
|
||||
for (i = 0; i < smallSideCount; i++)
|
||||
{
|
||||
if (joiners[i]->isFinished())
|
||||
tbpsJoiners.push_back(joiners[i]);
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
for (; i < smallSideCount; i++)
|
||||
{
|
||||
joinIsTooBig = true;
|
||||
joiners[i]->setConvertToDiskJoin();
|
||||
djsJoiners.push_back(joiners[i]);
|
||||
djsJoinerMap.push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user