1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-01 06:46:55 +03:00

Fix/mcol 5787 rgdata buffer max size dev (#3325)

* fix(rowgroup): RGData now uses uint64_t counter for the fixed sizes columns data buf.
	The buffer can utilize > 4GB RAM that is necessary for PM side join.
	RGData ctor uses uint32_t allocating data buffer.
 	This fact causes implicit heap overflow.

* feat(bytestream,serdes): BS buffer size type is uint64_t
	This necessary to handle 64bit RGData, that comes as
	a separate patch. The pair of patches would allow to
	have PM joins when SmallSide size > 4GB.

* feat(bytestream,serdes): Distribute BS buf size data type change to avoid implicit data type narrowing

* feat(rowgroup): this returns bits lost during cherry-pick. The bits lost caused the first RGData::serialize to crash a process
This commit is contained in:
drrtuy
2024-11-09 19:44:02 +00:00
committed by GitHub
parent 842a3c8a40
commit 8ae5a3da40
28 changed files with 1130 additions and 231 deletions

View File

@ -72,7 +72,6 @@ BatchPrimitiveProcessorJL::BatchPrimitiveProcessorJL(const ResourceManager* rm)
, LBIDTrace(false)
, tupleLength(0)
, status(0)
, sendRowGroups(false)
, valueColumn(0)
, sendTupleJoinRowGroupData(false)
, bop(BOP_AND)
@ -147,7 +146,7 @@ void BatchPrimitiveProcessorJL::addFilterStep(const pDictionaryStep& step)
tableOID = step.tableOid();
if (filterCount == 0 && !sendRowGroups)
if (filterCount == 0)
{
sendAbsRids = true;
sendValues = true;
@ -244,7 +243,7 @@ void BatchPrimitiveProcessorJL::addProjectStep(const PassThruStep& step)
if (utils::isWide(cc->getWidth()))
wideColumnsWidths |= cc->getWidth();
if (filterCount == 0 && !sendRowGroups)
if (filterCount == 0)
sendValues = true;
idbassert(sessionID == step.sessionId());
@ -283,7 +282,7 @@ void BatchPrimitiveProcessorJL::addProjectStep(const PassThruStep& p, const pDic
projectCount++;
needStrValues = true;
if (filterCount == 0 && !sendRowGroups)
if (filterCount == 0)
{
sendValues = true;
sendAbsRids = true;
@ -1054,9 +1053,6 @@ void BatchPrimitiveProcessorJL::createBPP(ByteStream& bs) const
if (tJoiners.size() > 0)
flags |= HAS_JOINER;
if (sendRowGroups)
flags |= HAS_ROWGROUP;
if (sendTupleJoinRowGroupData)
flags |= JOIN_ROWGROUP_DATA;
@ -1071,12 +1067,6 @@ void BatchPrimitiveProcessorJL::createBPP(ByteStream& bs) const
bs << bop;
bs << (uint8_t)(forHJ ? 1 : 0);
if (sendRowGroups)
{
bs << valueColumn;
bs << inputRG;
}
if (ot == ROW_GROUP)
{
bs << projectionRG;
@ -1248,6 +1238,7 @@ void BatchPrimitiveProcessorJL::createBPP(ByteStream& bs) const
* (projection count)x run msgs for projection Commands
*/
// The deser counterpart function is BPP::resetBPP
void BatchPrimitiveProcessorJL::runBPP(ByteStream& bs, uint32_t pmNum, bool isExeMgrDEC)
{
ISMPacketHeader ism;
@ -1289,35 +1280,28 @@ void BatchPrimitiveProcessorJL::runBPP(ByteStream& bs, uint32_t pmNum, bool isEx
bs << sentByEM;
if (_hasScan)
{
idbassert(ridCount == 0);
else if (!sendRowGroups)
}
else
{
idbassert(ridCount > 0 && (ridMap != 0 || sendAbsRids));
else
idbassert(inputRG.getRowCount() > 0);
if (sendRowGroups)
{
uint32_t rgSize = inputRG.getDataSize();
bs << rgSize;
bs.append(inputRG.getData(), rgSize);
}
bs << ridCount;
if (sendAbsRids)
bs.append((uint8_t*)absRids.get(), ridCount << 3);
else
{
bs << ridCount;
if (sendAbsRids)
bs.append((uint8_t*)absRids.get(), ridCount << 3);
else
{
bs << ridMap;
bs << baseRid;
bs.append((uint8_t*)relRids, ridCount << 1);
}
if (sendValues)
bs.append((uint8_t*)values, ridCount << 3);
bs << ridMap;
bs << baseRid;
bs.append((uint8_t*)relRids, ridCount << 1);
}
if (sendValues)
bs.append((uint8_t*)values, ridCount << 3);
for (i = 0; i < filterCount; i++)
filterSteps[i]->runCommand(bs);
@ -1667,7 +1651,6 @@ void BatchPrimitiveProcessorJL::setJoinedRowGroup(const rowgroup::RowGroup& rg)
void BatchPrimitiveProcessorJL::setInputRowGroup(const rowgroup::RowGroup& rg)
{
sendRowGroups = true;
sendAbsRids = false;
sendValues = false;
inputRG = rg;

View File

@ -343,7 +343,6 @@ class BatchPrimitiveProcessorJL
/* for RowGroup return type */
rowgroup::RowGroup inputRG, projectionRG;
bool sendRowGroups;
uint32_t valueColumn;
/* for PM Aggregation */

View File

@ -20,6 +20,7 @@
*
******************************************************************************/
#include <iostream>
#include "bytestream.h"
#include "primitivemsg.h"
#include "blocksize.h"
#include "lbidlist.h"
@ -700,7 +701,7 @@ bool LBIDList::CasualPartitionPredicate(const BRM::EMCasualPartition_t& cpRange,
const execplan::CalpontSystemCatalog::ColType& ct, const uint8_t BOP,
bool isDict)
{
int length = bs->length(), pos = 0;
messageqcpp::BSSizeType length = bs->length(), pos = 0;
const char* MsgDataPtr = (const char*)bs->buf();
bool scan = true;
int64_t value = 0;

View File

@ -282,6 +282,8 @@ struct ISMPacketHeader
uint32_t Interleave;
uint16_t Flags;
uint8_t Command;
// !!! This attribute is used to store a sum which arg type is potentially uint64_t.
// As of 23.02.10 uint32_t here is always enough for the purpose of this attribute though.
uint16_t Size;
unsigned Type : 4;
unsigned MsgCount : 4;

View File

@ -368,6 +368,7 @@ bool ResourceManager::getMemory(int64_t amount, boost::shared_ptr<int64_t>& sess
return (ret1 && ret2);
}
// Don't care about session memory
// The amount type is unsafe if amount close to max<int64_t> that is unrealistic in 2024.
bool ResourceManager::getMemory(int64_t amount, bool patience)
{
bool ret1 = (atomicops::atomicSub(&totalUmMemLimit, amount) >= 0);

View File

@ -19,6 +19,7 @@
* $Id: rowestimator.cpp 5642 2009-08-10 21:04:59Z wweeks $
*
******************************************************************************/
#include <cassert>
#include <iostream>
#include "primitivemsg.h"
#include "blocksize.h"
@ -292,7 +293,7 @@ float RowEstimator::estimateRowReturnFactor(const BRM::EMEntry& emEntry, const m
// For example, there are two operations for "col1 > 5 and col1 < 10":
// 1) col1 > 5
// 2) col2 < 10
int length = bs->length(), pos = 0;
messageqcpp::BSSizeType length = bs->length(), pos = 0;
const char* msgDataPtr = (const char*)bs->buf();
int64_t value = 0;
int128_t bigValue = 0;
@ -301,6 +302,7 @@ float RowEstimator::estimateRowReturnFactor(const BRM::EMEntry& emEntry, const m
for (int i = 0; i < comparisonLimit; i++)
{
assert(ct.colWidth >= 0);
pos += ct.colWidth + 2; // predicate + op + lcf
// TODO: Stole this condition from lbidlist.

View File

@ -417,6 +417,7 @@ void TupleHashJoinStep::smallRunnerFcn(uint32_t index, uint threadID, uint64_t*
smallRG.initRow(&r);
try
{
// Very unfortunate choice for the type b/c of RM::getMemory type.
ssize_t rgSize;
bool gotMem;
goto next;

View File

@ -4300,7 +4300,7 @@ ReturnedColumn* buildFunctionColumn(Item_func* ifp, gp_walk_info& gwi, bool& non
if (from_tzinfo)
{
serializeTimezoneInfo(bs, from_tzinfo);
uint32_t length = bs.length();
messageqcpp::BSSizeType length = bs.length();
uint8_t* buf = new uint8_t[length];
bs >> buf;
tzinfo = string((char*)buf, length);
@ -4312,7 +4312,7 @@ ReturnedColumn* buildFunctionColumn(Item_func* ifp, gp_walk_info& gwi, bool& non
if (to_tzinfo)
{
serializeTimezoneInfo(bs, to_tzinfo);
uint32_t length = bs.length();
messageqcpp::BSSizeType length = bs.length();
uint8_t* buf = new uint8_t[length];
bs >> buf;
tzinfo = string((char*)buf, length);