1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-5429 Fix high memory consumption in GROUP_CONCAT() processing.

1. Input and output RowGroup's used in GROUP_CONCAT classes
are currently allocating a raw memory buffer of size equal
to the actual width of the string datatype. As an example,
for the following query:
  SELECT col1, GROUP_CONCAT(col2) FROM t GROUP BY col1;
If col2 is a TEXT field with default width, the input
RowGroup containing the target rows to be concatenated will
assign 64kb of memory for every input row in the RowGroup.
This is wasteful as actual field values in real workloads
would be much smaller. We fix this by enabling the
RowGroup to use the StringStore when the RowGroup contains
long strings.

2. RowAggregation::initialize() allocates a memory buffer
for a NULL row. The size of this buffer is equal to the
row size for the output RowGroup. For the above scenario,
using the default group_concat_max_len (which is a server
variable that sets the maximum length of the GROUP_CONCAT string)
value of 1mb, the buffer size would be
(1mb + 64kb + some additional metadata). If the user sets
group_concat_max_len to a higher value, say 3gb, this buffer
size would be ~3gb. Now if the runtime initiates several
instances of RowAggregation, total memory consumption by
PrimProc could exceed the hardware memory limits causing the
OS OOM to kill the process. We fix this problem by again
enabling the StringStore for the NULL row allocation.

3. In the plugin code in buildAggregateColumn(), there is
an integer overflow when the server group_concat_max_len
variable (which is an uint32_t) is set to a value > INT32_MAX
(such as 3gb) and is assigned to
CalpontSystemCatalog::ColType::colWidth (which is an int32_t).
As a short term fix, we saturate the assigned value to colWidth
to INT32_MAX. Proper fix would be to upgrade
CalpontSystemCatalog::ColType::colWidth to an uint32_t.
This commit is contained in:
Gagan Goel
2023-04-19 21:11:06 +00:00
parent 4fe9cd64a3
commit 0be1c3dc8f
6 changed files with 78 additions and 14 deletions

View File

@ -656,7 +656,7 @@ void RowAggregation::resetUDAF(RowUDAFFunctionCol* rowUDAF, uint64_t funcColsIdx
// Initilalize the data members to meaningful values, setup the hashmap.
// The fRowGroupOut must have a valid data pointer before this.
//------------------------------------------------------------------------------
void RowAggregation::initialize()
void RowAggregation::initialize(bool hasGroupConcat)
{
// Calculate the length of the hashmap key.
fAggMapKeyCount = fGroupByCols.size();
@ -694,9 +694,25 @@ void RowAggregation::initialize()
makeAggFieldsNull(fRow);
// Keep a copy of the null row to initialize new map entries.
fRowGroupOut->initRow(&fNullRow, true);
fNullRowData.reset(new uint8_t[fNullRow.getSize()]);
fNullRow.setData(rowgroup::Row::Pointer(fNullRowData.get()));
// MCOL-5429 Use stringstore if the datatype of the groupconcat
// field is a long string.
if (hasGroupConcat && fRowGroupOut->hasLongString())
{
fNullRowGroup = *fRowGroupOut;
fNullRowGroup.setUseStringTable(true);
fNullRowRGData.reinit(fNullRowGroup, 1);
fNullRowGroup.setData(&fNullRowRGData);
fNullRowGroup.resetRowGroup(0);
fNullRowGroup.initRow(&fNullRow);
fNullRowGroup.getRow(0, &fNullRow);
}
else
{
fRowGroupOut->initRow(&fNullRow, true);
fNullRowData.reset(new uint8_t[fNullRow.getSize()]);
fNullRow.setData(rowgroup::Row::Pointer(fNullRowData.get()));
}
copyRow(fRow, &fNullRow);
// Lazy approach w/o a mapping b/w fFunctionCols idx and fRGContextColl idx
@ -2413,7 +2429,7 @@ void RowAggregationUM::endOfInput()
//------------------------------------------------------------------------------
// Initilalize the Group Concat data
//------------------------------------------------------------------------------
void RowAggregationUM::initialize()
void RowAggregationUM::initialize(bool hasGroupConcat)
{
if (fGroupConcat.size() > 0)
fFunctionColGc = fFunctionCols;
@ -2423,7 +2439,7 @@ void RowAggregationUM::initialize()
fKeyRG = fRowGroupIn.truncate(fGroupByCols.size());
}
RowAggregation::initialize();
RowAggregation::initialize(fGroupConcat.size() > 0);
}
//------------------------------------------------------------------------------