1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-01 06:46:55 +03:00

fix(PrimProc): MCOL-5852 use only long string storage

for the group_concat data to reduce memory usage
This commit is contained in:
Aleksei Antipovskii
2025-01-17 13:02:28 +01:00
committed by Alexey Antipovsky
parent ea6d2d873e
commit 87d47fd7ae
3 changed files with 27 additions and 6 deletions

View File

@ -79,7 +79,7 @@ uint64_t StringStore::storeString(const uint8_t* data, uint32_t len)
if (mem.size() > 0)
lastMC = (MemChunk*)mem.back().get();
if ((len + 4) >= CHUNK_SIZE)
if ((len + 4) >= CHUNK_SIZE || fUseOnlyLongStrings)
{
auto allocSize = len + sizeof(MemChunk) + 4;
if (alloc)
@ -316,8 +316,10 @@ RGData::RGData(const RowGroup& rg, uint32_t rowCount)
RGDataSizeType s = rg.getDataSize(rowCount);
rowData.reset(new uint8_t[s]);
if (rg.usesStringTable() && rowCount > 0)
if (rg.usesStringTable() && rowCount > 0) {
strings.reset(new StringStore());
strings->useOnlyLongStrings(rg.usesOnlyLongString());
}
userDataStore.reset();
columnCount = rg.getColumnCount();
@ -329,7 +331,10 @@ RGData::RGData(const RowGroup& rg)
rowData.reset(new uint8_t[rg.getMaxDataSize()]);
if (rg.usesStringTable())
{
strings.reset(new StringStore());
strings->useOnlyLongStrings(rg.usesOnlyLongString());
}
userDataStore.reset();
columnCount = rg.getColumnCount();
@ -345,6 +350,7 @@ RGData::RGData(const RowGroup& rg, allocators::CountingAllocator<RGDataBufType>&
{
allocators::CountingAllocator<StringStoreBufType> ssAlloc = _alloc;
strings.reset(new StringStore(ssAlloc));
strings->useOnlyLongStrings(rg.usesOnlyLongString());
}
userDataStore.reset();
@ -365,7 +371,7 @@ void RGData::reinit(const RowGroup& rg, uint32_t rowCount)
userDataStore.reset();
if (rg.usesStringTable())
if (rg.usesStringTable())
{
if (alloc)
{
@ -386,7 +392,7 @@ if (rg.usesStringTable())
void RGData::reinit(const RowGroup& rg)
{
reinit(rg, 8192);
reinit(rg, rgCommonSize);
}
void RGData::serialize(ByteStream& bs, RGDataSizeType amount) const
@ -1128,6 +1134,7 @@ RowGroup::RowGroup(const RowGroup& r)
, rgData(r.rgData)
, strings(r.strings)
, useStringTable(r.useStringTable)
, useOnlyLongStrings(r.useOnlyLongStrings)
, hasCollation(r.hasCollation)
, hasLongStringField(r.hasLongStringField)
, sTableThreshold(r.sTableThreshold)
@ -1160,6 +1167,7 @@ RowGroup& RowGroup::operator=(const RowGroup& r)
rgData = r.rgData;
strings = r.strings;
useStringTable = r.useStringTable;
useOnlyLongStrings = r.useOnlyLongStrings;
hasCollation = r.hasCollation;
hasLongStringField = r.hasLongStringField;
sTableThreshold = r.sTableThreshold;
@ -1212,6 +1220,7 @@ void RowGroup::serialize(ByteStream& bs) const
bs << (uint8_t)hasLongStringField;
bs << sTableThreshold;
bs.append((uint8_t*)&forceInline[0], sizeof(bool) * columnCount);
bs << (uint8_t)useOnlyLongStrings;
}
void RowGroup::deserialize(ByteStream& bs)
@ -1238,6 +1247,8 @@ void RowGroup::deserialize(ByteStream& bs)
forceInline.reset(new bool[columnCount]);
memcpy(&forceInline[0], bs.buf(), sizeof(bool) * columnCount);
bs.advance(sizeof(bool) * columnCount);
bs >> tmp8;
useOnlyLongStrings = (bool)tmp8;
// offsets = (useStringTable ? &stOffsets[0] : &oldOffsets[0]);
offsets = 0;