From be5711cf0debaa44debf6224e82295e065a07cb9 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Tue, 4 Mar 2025 21:10:39 +0000 Subject: [PATCH] feat(): replace getMaxDataSize with getMaxDataSizeWithStrings to accurately account for mem --- dbcon/joblist/diskjoinstep.cpp | 16 +++++++++------- dbcon/joblist/tuple-bps.cpp | 5 +++-- utils/rowgroup/rowstorage.cpp | 2 +- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/dbcon/joblist/diskjoinstep.cpp b/dbcon/joblist/diskjoinstep.cpp index 86e6fa886..3bcd6945f 100644 --- a/dbcon/joblist/diskjoinstep.cpp +++ b/dbcon/joblist/diskjoinstep.cpp @@ -519,16 +519,18 @@ void DiskJoinStep::joinFcn(const uint32_t threadID) // cout << "inserting a full RG" << endl; if (thjs) { - // FIXME: Possible false positive. Something wrong with this calculation, just put a warning - // until fixed. - if (!thjs->getMemory(l_outputRG.getMaxDataSize())) + if (!thjs->getMemory(l_outputRG.getMaxDataSizeWithStrings())) { // FIXME: This is also looks wrong. // calculate guess of size required for error message - uint64_t memReqd = (unmatched.size() * outputRG.getDataSize(1)) / 1048576; - uint64_t memLimit = thjs->resourceManager->getConfiguredUMMemLimit() / 1048576; - std::cerr << "DiskJoin::joinFcn() possible OOM for the join result, mem required: " - << memReqd << " mem limit: " << memLimit << std::endl; + uint64_t memReqd = (l_outputRG.getMaxDataSizeWithStrings()) / 1048576; + Message::Args args; + args.add(memReqd); + args.add(thjs->resourceManager->getConfiguredUMMemLimit() / 1048576); + std::cerr << logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_JOIN_RESULT_TOO_BIG, + args) + << " @" << __FILE__ << ":" << __LINE__; + throw logging::IDBExcept(logging::ERR_JOIN_RESULT_TOO_BIG, args); } } diff --git a/dbcon/joblist/tuple-bps.cpp b/dbcon/joblist/tuple-bps.cpp index 5696a1376..e12f250d3 100644 --- a/dbcon/joblist/tuple-bps.cpp +++ b/dbcon/joblist/tuple-bps.cpp @@ -275,9 +275,10 @@ uint64_t TupleBPS::JoinLocalData::generateJoinResultSet(const uint32_t depth, uint64_t baseRid = local_outputRG.getBaseRid(); outputData.push_back(joinedData); // Don't let the join results buffer get out of control. - if (tbps->resourceManager()->getMemory(local_outputRG.getMaxDataSize(), false)) + auto outputDataSize = local_outputRG.getMaxDataSizeWithStrings(); + if (tbps->resourceManager()->getMemory(outputDataSize, false)) { - memSizeForOutputRG += local_outputRG.getMaxDataSize(); + memSizeForOutputRG += outputDataSize; } else { diff --git a/utils/rowgroup/rowstorage.cpp b/utils/rowgroup/rowstorage.cpp index ec5699ada..bec3faa65 100644 --- a/utils/rowgroup/rowstorage.cpp +++ b/utils/rowgroup/rowstorage.cpp @@ -1361,7 +1361,7 @@ class RowGroupStorage { messageqcpp::ByteStream bs; fRowGroupOut->setData(rgdata); - rgdata->serialize(bs, fRowGroupOut->getDataSize()); + rgdata->serialize(bs, fRowGroupOut->getSizeWithStrings()); int errNo; if ((errNo = fDumper->write(makeRGFilename(rgid), (char*)bs.buf(), bs.length())) != 0)