1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-01 06:46:55 +03:00

feat(PrimProc): MCOL-5852 disk-based GROUP_CONCAT & JSON_ARRAYAGG

* move GROUP_CONCAT/JSON_ARRAYAGG storage to the RowGroup from
  the RowAggregation*
* internal data structures (de)serialization
* get rid of a specialized classes for processing JSON_ARRAYAGG
* move the memory accounting to disk-based aggregation classes
* allow aggregation generations to be used for queries with
  GROUP_CONCAT/JSON_ARRAYAGG
* Remove the thread id from the error message as it interferes with the mtr
This commit is contained in:
Aleksei Antipovskii
2025-02-19 12:32:51 +01:00
committed by Alexey Antipovsky
parent 87d47fd7ae
commit 4bea7e59a0
25 changed files with 1339 additions and 2056 deletions

View File

@ -29,6 +29,8 @@
// #define NDEBUG
#include <sstream>
#include <iterator>
#include "rowaggregation.h"
using namespace std;
#include <numeric>
@ -43,6 +45,7 @@ using namespace execplan;
#include "rowgroup.h"
#include "dataconvert.h"
#include "columnwidth.h"
#include "groupconcat.h"
namespace rowgroup
{
@ -305,6 +308,72 @@ void UserDataStore::deserialize(ByteStream& bs)
return;
}
void AggregateDataStore::serialize(messageqcpp::ByteStream& bs) const
{
uint64_t size = fGroupConcat.size();
bs << size;
for (const auto& gc : fGroupConcat)
{
gc->serialize(bs);
}
size = fData.size();
bs << size;
for (const auto& gca : fData)
{
bs << gca->getGroupConcatId();
gca->serialize(bs);
}
}
void AggregateDataStore::deserialize(messageqcpp::ByteStream& bs)
{
fGroupConcat.clear();
fData.clear();
uint64_t size;
bs >> size;
fGroupConcat.resize(size);
for (uint64_t i = 0; i < size; i++)
{
fGroupConcat[i].reset(new GroupConcat());
fGroupConcat[i]->deserialize(bs);
}
bs >> size;
fData.resize(size);
for (uint64_t i = 0; i < size; i++)
{
uint32_t gc_id;
bs >> gc_id;
idbassert(gc_id < fGroupConcat.size());
fData[i].reset(new joblist::GroupConcatAg(fGroupConcat[gc_id]));
fData[i]->deserialize(bs);
}
}
uint32_t AggregateDataStore::storeAggregateData(boost::shared_ptr<joblist::GroupConcatAg>& data)
{
fData.emplace_back(data);
return fData.size() - 1;
}
boost::shared_ptr<joblist::GroupConcatAg> AggregateDataStore::getAggregateData(uint32_t pos) const
{
idbassert(pos < fData.size());
return fData[pos];
}
RGDataSizeType AggregateDataStore::getDataSize() const
{
RGDataSizeType size = 0;
for (const auto& gc : fGroupConcat)
{
size += gc->getDataSize();
}
for (const auto& gca : fData)
{
size += gca->getDataSize();
}
return size;
}
RGData::RGData(allocators::CountingAllocator<RGDataBufType>& _alloc) : RGData()
{
@ -316,29 +385,24 @@ RGData::RGData(const RowGroup& rg, uint32_t rowCount)
RGDataSizeType s = rg.getDataSize(rowCount);
rowData.reset(new uint8_t[s]);
if (rg.usesStringTable() && rowCount > 0) {
if (rg.usesStringTable() && rowCount > 0)
{
strings.reset(new StringStore());
strings->useOnlyLongStrings(rg.usesOnlyLongString());
}
if (rg.usesAggregateDataStore())
{
aggregateDataStore.reset(new AggregateDataStore(rg.getGroupConcats()));
}
userDataStore.reset();
columnCount = rg.getColumnCount();
rowSize = rg.getRowSize();
}
RGData::RGData(const RowGroup& rg)
RGData::RGData(const RowGroup& rg) : RGData(rg, rgCommonSize)
{
rowData.reset(new uint8_t[rg.getMaxDataSize()]);
if (rg.usesStringTable())
{
strings.reset(new StringStore());
strings->useOnlyLongStrings(rg.usesOnlyLongString());
}
userDataStore.reset();
columnCount = rg.getColumnCount();
rowSize = rg.getRowSize();
}
@ -371,21 +435,28 @@ void RGData::reinit(const RowGroup& rg, uint32_t rowCount)
userDataStore.reset();
if (rg.usesStringTable())
if (rg.usesStringTable() || rg.usesOnlyLongString())
{
if (alloc)
{
allocators::CountingAllocator<StringStoreBufType> ssAlloc = alloc.value();
strings.reset(new StringStore(ssAlloc));
strings->useOnlyLongStrings(rg.usesOnlyLongString());
}
else
{
strings.reset(new StringStore());
}
}
else
strings.reset();
if (rg.usesAggregateDataStore())
{
aggregateDataStore.reset(new AggregateDataStore(rg.getGroupConcats()));
}
else
aggregateDataStore.reset();
columnCount = rg.getColumnCount();
rowSize = rg.getRowSize();
}
@ -419,6 +490,14 @@ void RGData::serialize(ByteStream& bs, RGDataSizeType amount) const
}
else
bs << (uint8_t)0;
if (aggregateDataStore)
{
bs << (uint8_t)1;
aggregateDataStore->serialize(bs);
}
else
bs << (uint8_t)0;
}
void RGData::deserialize(ByteStream& bs, RGDataSizeType defAmount)
@ -473,6 +552,15 @@ void RGData::deserialize(ByteStream& bs, RGDataSizeType defAmount)
}
else
userDataStore.reset();
bs >> tmp8;
if (tmp8)
{
aggregateDataStore.reset(new AggregateDataStore());
aggregateDataStore->deserialize(bs);
}
else
aggregateDataStore.reset();
}
return;
@ -1133,8 +1221,10 @@ RowGroup::RowGroup(const RowGroup& r)
, precision(r.precision)
, rgData(r.rgData)
, strings(r.strings)
, aggregateDataStore(r.aggregateDataStore)
, useStringTable(r.useStringTable)
, useOnlyLongStrings(r.useOnlyLongStrings)
, useAggregateDataStore(r.useAggregateDataStore)
, hasCollation(r.hasCollation)
, hasLongStringField(r.hasLongStringField)
, sTableThreshold(r.sTableThreshold)
@ -1166,8 +1256,10 @@ RowGroup& RowGroup::operator=(const RowGroup& r)
precision = r.precision;
rgData = r.rgData;
strings = r.strings;
aggregateDataStore = r.aggregateDataStore;
useStringTable = r.useStringTable;
useOnlyLongStrings = r.useOnlyLongStrings;
useAggregateDataStore = r.useAggregateDataStore;
hasCollation = r.hasCollation;
hasLongStringField = r.hasLongStringField;
sTableThreshold = r.sTableThreshold;
@ -1261,6 +1353,25 @@ void RowGroup::deserialize(ByteStream& bs)
charsets.insert(charsets.begin(), charsetNumbers.size(), nullptr);
}
void RowGroup::setUseAggregateDataStore(bool b, std::span<boost::shared_ptr<GroupConcat>> group_concats)
{
idbassert(!b || !group_concats.empty());
if (useAggregateDataStore && !b)
{
fGroupConcats.clear();
}
else if (b)
{
fGroupConcats.assign(group_concats.begin(), group_concats.end());
if (rgData)
{
rgData->aggregateDataStore.reset(new AggregateDataStore(fGroupConcats));
aggregateDataStore = rgData->aggregateDataStore.get();
}
}
useAggregateDataStore = b;
}
void RowGroup::serializeRGData(ByteStream& bs) const
{
rgData->serialize(bs, getDataSize());