1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-07 03:22:57 +03:00

fix(PrimProc): MCOL-5852 use only long string storage

for the group_concat data to reduce memory usage
This commit is contained in:
Aleksei Antipovskii
2025-01-17 13:02:28 +01:00
committed by Alexey Antipovsky
parent ea6d2d873e
commit 87d47fd7ae
3 changed files with 27 additions and 6 deletions

View File

@@ -332,6 +332,7 @@ void GroupConcatAgUM::initialize()
{ {
fRowGroup = fGroupConcat->fRowGroup; fRowGroup = fGroupConcat->fRowGroup;
fRowGroup.setUseStringTable(true); fRowGroup.setUseStringTable(true);
fRowGroup.setUseOnlyLongString(true);
fRowRGData.reinit(fRowGroup, 1); fRowRGData.reinit(fRowGroup, 1);
fRowGroup.setData(&fRowRGData); fRowGroup.setData(&fRowRGData);
fRowGroup.resetRowGroup(0); fRowGroup.resetRowGroup(0);
@@ -998,6 +999,7 @@ void GroupConcatNoOrder::initialize(const rowgroup::SP_GroupConcat& gcc)
GroupConcator::initialize(gcc); GroupConcator::initialize(gcc);
fRowGroup = gcc->fRowGroup; fRowGroup = gcc->fRowGroup;
fRowGroup.setUseOnlyLongString(true);
fRowsPerRG = 128; fRowsPerRG = 128;
fErrorCode = ERR_AGGREGATION_TOO_BIG; fErrorCode = ERR_AGGREGATION_TOO_BIG;
fRm = gcc->fRm; fRm = gcc->fRm;
@@ -1010,7 +1012,7 @@ void GroupConcatNoOrder::initialize(const rowgroup::SP_GroupConcat& gcc)
uint64_t newSize = fRowsPerRG * fRowGroup.getRowSize(); uint64_t newSize = fRowsPerRG * fRowGroup.getRowSize();
if (!fRm->getMemory(newSize, fSessionMemLimit)) if (fRm && !fRm->getMemory(newSize, fSessionMemLimit))
{ {
cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode) << " @" << __FILE__ << ":" << __LINE__; cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode) << " @" << __FILE__ << ":" << __LINE__;
throw IDBExcept(fErrorCode); throw IDBExcept(fErrorCode);
@@ -1020,6 +1022,7 @@ void GroupConcatNoOrder::initialize(const rowgroup::SP_GroupConcat& gcc)
fData.reinit(fRowGroup, fRowsPerRG); fData.reinit(fRowGroup, fRowsPerRG);
fRowGroup.setData(&fData); fRowGroup.setData(&fData);
fRowGroup.setUseOnlyLongString(true);
fRowGroup.resetRowGroup(0); fRowGroup.resetRowGroup(0);
fRowGroup.initRow(&fRow); fRowGroup.initRow(&fRow);
fRowGroup.getRow(0, &fRow); fRowGroup.getRow(0, &fRow);

View File

@@ -79,7 +79,7 @@ uint64_t StringStore::storeString(const uint8_t* data, uint32_t len)
if (mem.size() > 0) if (mem.size() > 0)
lastMC = (MemChunk*)mem.back().get(); lastMC = (MemChunk*)mem.back().get();
if ((len + 4) >= CHUNK_SIZE) if ((len + 4) >= CHUNK_SIZE || fUseOnlyLongStrings)
{ {
auto allocSize = len + sizeof(MemChunk) + 4; auto allocSize = len + sizeof(MemChunk) + 4;
if (alloc) if (alloc)
@@ -316,8 +316,10 @@ RGData::RGData(const RowGroup& rg, uint32_t rowCount)
RGDataSizeType s = rg.getDataSize(rowCount); RGDataSizeType s = rg.getDataSize(rowCount);
rowData.reset(new uint8_t[s]); rowData.reset(new uint8_t[s]);
if (rg.usesStringTable() && rowCount > 0) if (rg.usesStringTable() && rowCount > 0) {
strings.reset(new StringStore()); strings.reset(new StringStore());
strings->useOnlyLongStrings(rg.usesOnlyLongString());
}
userDataStore.reset(); userDataStore.reset();
columnCount = rg.getColumnCount(); columnCount = rg.getColumnCount();
@@ -329,7 +331,10 @@ RGData::RGData(const RowGroup& rg)
rowData.reset(new uint8_t[rg.getMaxDataSize()]); rowData.reset(new uint8_t[rg.getMaxDataSize()]);
if (rg.usesStringTable()) if (rg.usesStringTable())
{
strings.reset(new StringStore()); strings.reset(new StringStore());
strings->useOnlyLongStrings(rg.usesOnlyLongString());
}
userDataStore.reset(); userDataStore.reset();
columnCount = rg.getColumnCount(); columnCount = rg.getColumnCount();
@@ -345,6 +350,7 @@ RGData::RGData(const RowGroup& rg, allocators::CountingAllocator<RGDataBufType>&
{ {
allocators::CountingAllocator<StringStoreBufType> ssAlloc = _alloc; allocators::CountingAllocator<StringStoreBufType> ssAlloc = _alloc;
strings.reset(new StringStore(ssAlloc)); strings.reset(new StringStore(ssAlloc));
strings->useOnlyLongStrings(rg.usesOnlyLongString());
} }
userDataStore.reset(); userDataStore.reset();
@@ -365,7 +371,7 @@ void RGData::reinit(const RowGroup& rg, uint32_t rowCount)
userDataStore.reset(); userDataStore.reset();
if (rg.usesStringTable()) if (rg.usesStringTable())
{ {
if (alloc) if (alloc)
{ {
@@ -386,7 +392,7 @@ if (rg.usesStringTable())
void RGData::reinit(const RowGroup& rg) void RGData::reinit(const RowGroup& rg)
{ {
reinit(rg, 8192); reinit(rg, rgCommonSize);
} }
void RGData::serialize(ByteStream& bs, RGDataSizeType amount) const void RGData::serialize(ByteStream& bs, RGDataSizeType amount) const
@@ -1128,6 +1134,7 @@ RowGroup::RowGroup(const RowGroup& r)
, rgData(r.rgData) , rgData(r.rgData)
, strings(r.strings) , strings(r.strings)
, useStringTable(r.useStringTable) , useStringTable(r.useStringTable)
, useOnlyLongStrings(r.useOnlyLongStrings)
, hasCollation(r.hasCollation) , hasCollation(r.hasCollation)
, hasLongStringField(r.hasLongStringField) , hasLongStringField(r.hasLongStringField)
, sTableThreshold(r.sTableThreshold) , sTableThreshold(r.sTableThreshold)
@@ -1160,6 +1167,7 @@ RowGroup& RowGroup::operator=(const RowGroup& r)
rgData = r.rgData; rgData = r.rgData;
strings = r.strings; strings = r.strings;
useStringTable = r.useStringTable; useStringTable = r.useStringTable;
useOnlyLongStrings = r.useOnlyLongStrings;
hasCollation = r.hasCollation; hasCollation = r.hasCollation;
hasLongStringField = r.hasLongStringField; hasLongStringField = r.hasLongStringField;
sTableThreshold = r.sTableThreshold; sTableThreshold = r.sTableThreshold;
@@ -1212,6 +1220,7 @@ void RowGroup::serialize(ByteStream& bs) const
bs << (uint8_t)hasLongStringField; bs << (uint8_t)hasLongStringField;
bs << sTableThreshold; bs << sTableThreshold;
bs.append((uint8_t*)&forceInline[0], sizeof(bool) * columnCount); bs.append((uint8_t*)&forceInline[0], sizeof(bool) * columnCount);
bs << (uint8_t)useOnlyLongStrings;
} }
void RowGroup::deserialize(ByteStream& bs) void RowGroup::deserialize(ByteStream& bs)
@@ -1238,6 +1247,8 @@ void RowGroup::deserialize(ByteStream& bs)
forceInline.reset(new bool[columnCount]); forceInline.reset(new bool[columnCount]);
memcpy(&forceInline[0], bs.buf(), sizeof(bool) * columnCount); memcpy(&forceInline[0], bs.buf(), sizeof(bool) * columnCount);
bs.advance(sizeof(bool) * columnCount); bs.advance(sizeof(bool) * columnCount);
bs >> tmp8;
useOnlyLongStrings = (bool)tmp8;
// offsets = (useStringTable ? &stOffsets[0] : &oldOffsets[0]); // offsets = (useStringTable ? &stOffsets[0] : &oldOffsets[0]);
offsets = 0; offsets = 0;

View File

@@ -63,7 +63,7 @@
namespace rowgroup namespace rowgroup
{ {
const int16_t rgCommonSize = 8192; constexpr int16_t rgCommonSize = 8192;
using RGDataSizeType = uint64_t; using RGDataSizeType = uint64_t;
/* /*
@@ -172,6 +172,8 @@ class StringStore
{ {
return fUseStoreStringMutex; return fUseStoreStringMutex;
} }
void useOnlyLongStrings(bool b) { fUseOnlyLongStrings = b; }
bool useOnlyLongStrings() const { return fUseOnlyLongStrings; }
// This is an overlay b/c the underlying data needs to be any size, // This is an overlay b/c the underlying data needs to be any size,
// and alloc'd in one chunk. data can't be a separate dynamic chunk. // and alloc'd in one chunk. data can't be a separate dynamic chunk.
@@ -193,6 +195,7 @@ class StringStore
std::vector<boost::shared_ptr<uint8_t[]>> longStrings; std::vector<boost::shared_ptr<uint8_t[]>> longStrings;
bool empty = true; bool empty = true;
bool fUseStoreStringMutex = false; //@bug6065, make StringStore::storeString() thread safe bool fUseStoreStringMutex = false; //@bug6065, make StringStore::storeString() thread safe
bool fUseOnlyLongStrings = false;
boost::mutex fMutex; boost::mutex fMutex;
std::optional<allocators::CountingAllocator<StringStoreBufType>> alloc {}; std::optional<allocators::CountingAllocator<StringStoreBufType>> alloc {};
}; };
@@ -1556,6 +1559,8 @@ class RowGroup : public messageqcpp::Serializeable
inline bool usesStringTable() const; inline bool usesStringTable() const;
inline void setUseStringTable(bool); inline void setUseStringTable(bool);
void setUseOnlyLongString(bool b) { useOnlyLongStrings = b; }
bool usesOnlyLongString() const { return useOnlyLongStrings ; }
bool hasLongString() const bool hasLongString() const
{ {
@@ -1628,6 +1633,8 @@ class RowGroup : public messageqcpp::Serializeable
RGData* rgData = nullptr; RGData* rgData = nullptr;
StringStore* strings = nullptr; // note, strings and data belong to rgData StringStore* strings = nullptr; // note, strings and data belong to rgData
bool useStringTable = true; bool useStringTable = true;
bool useOnlyLongStrings = false;
bool useAggregateDataStore = true;
bool hasCollation = false; bool hasCollation = false;
bool hasLongStringField = false; bool hasLongStringField = false;
uint32_t sTableThreshold = 20; uint32_t sTableThreshold = 20;