From d1de121476c00a88a27cb83999d2e5be087e263d Mon Sep 17 00:00:00 2001 From: drrtuy Date: Tue, 25 Mar 2025 17:50:35 +0000 Subject: [PATCH] fix(): periodically return from UM JOIN loop that can produce a lot of RGData-s and thus overcommit for RAM --- dbcon/joblist/tuplehashjoin.cpp | 5 ++++- dbcon/joblist/tuplehashjoin.h | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/dbcon/joblist/tuplehashjoin.cpp b/dbcon/joblist/tuplehashjoin.cpp index cefddc8c7..9e7b11a30 100644 --- a/dbcon/joblist/tuplehashjoin.cpp +++ b/dbcon/joblist/tuplehashjoin.cpp @@ -1828,7 +1828,10 @@ void TupleHashJoinStep::generateJoinResultSet(const vector } else { + // NB In case of OUTER JOIN this loop can produce a lot of RGDatas, + // so it is a must to periodically flush from this loop. l_outputRG.getRow(l_outputRG.getRowCount(), &joinedRow); + auto flushThreshold = outputDL->maxElements(); for (i = 0; i < joinerOutput[depth].size(); i++, joinedRow.nextRow(), l_outputRG.incRowCount()) { @@ -1840,7 +1843,7 @@ void TupleHashJoinStep::generateJoinResultSet(const vector uint64_t baseRid = l_outputRG.getBaseRid(); outputData.push_back(rgData); // Count the memory - if (UNLIKELY(!getMemory(l_outputRG.getSizeWithStrings()))) + if (UNLIKELY(outputData.size() > flushThreshold || !getMemory(l_outputRG.getSizeWithStrings()))) { // MCOL-5512 if (fe2) diff --git a/dbcon/joblist/tuplehashjoin.h b/dbcon/joblist/tuplehashjoin.h index f65391e25..e588b8a39 100644 --- a/dbcon/joblist/tuplehashjoin.h +++ b/dbcon/joblist/tuplehashjoin.h @@ -529,6 +529,8 @@ class TupleHashJoinStep : public JobStep, public TupleDeliveryStep } void operator()() { + std::string name = "HJSJoinRun" + std::to_string(index); + utils::setThreadName(name.c_str()); HJ->joinRunnerFcn(index); } TupleHashJoinStep* HJ;