You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
1. Input and output RowGroup's used in GROUP_CONCAT classes are currently allocating a raw memory buffer of size equal to the actual width of the string datatype. As an example, for the following query: SELECT col1, GROUP_CONCAT(col2) FROM t GROUP BY col1; If col2 is a TEXT field with default width, the input RowGroup containing the target rows to be concatenated will assign 64kb of memory for every input row in the RowGroup. This is wasteful as actual field values in real workloads would be much smaller. We fix this by enabling the RowGroup to use the StringStore when the RowGroup contains long strings. 2. RowAggregation::initialize() allocates a memory buffer for a NULL row. The size of this buffer is equal to the row size for the output RowGroup. For the above scenario, using the default group_concat_max_len (which is a server variable that sets the maximum length of the GROUP_CONCAT string) value of 1mb, the buffer size would be (1mb + 64kb + some additional metadata). If the user sets group_concat_max_len to a higher value, say 3gb, this buffer size would be ~3gb. Now if the runtime initiates several instances of RowAggregation, total memory consumption by PrimProc could exceed the hardware memory limits causing the OS OOM to kill the process. We fix this problem by again enabling the StringStore for the NULL row allocation. 3. In the plugin code in buildAggregateColumn(), there is an integer overflow when the server group_concat_max_len variable (which is an uint32_t) is set to a value > INT32_MAX (such as 3gb) and is assigned to CalpontSystemCatalog::ColType::colWidth (which is an int32_t). As a short term fix, we saturate the assigned value to colWidth to INT32_MAX. Proper fix would be to upgrade CalpontSystemCatalog::ColType::colWidth to an uint32_t.
186 lines
4.7 KiB
C++
186 lines
4.7 KiB
C++
/* Copyright (C) 2014 InfiniDB, Inc.
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
// $Id: groupconcat.h 9705 2013-07-17 20:06:07Z pleblanc $
|
|
|
|
/** @file */
|
|
|
|
#pragma once
|
|
|
|
#include <utility>
|
|
#include <set>
|
|
#include <vector>
|
|
#include <boost/scoped_ptr.hpp>
|
|
|
|
|
|
#include "returnedcolumn.h" // SRCP
|
|
#include "rowgroup.h" // RowGroup
|
|
#include "rowaggregation.h" // SP_GroupConcat
|
|
#include "limitedorderby.h" // IdbOrderBy
|
|
|
|
#define EXPORT
|
|
|
|
namespace joblist
|
|
{
|
|
// forward reference
|
|
struct JobInfo;
|
|
class GroupConcator;
|
|
class ResourceManager;
|
|
|
|
class GroupConcatInfo
|
|
{
|
|
public:
|
|
GroupConcatInfo();
|
|
virtual ~GroupConcatInfo();
|
|
|
|
void prepGroupConcat(JobInfo&);
|
|
void mapColumns(const rowgroup::RowGroup&);
|
|
|
|
std::set<uint32_t>& columns()
|
|
{
|
|
return fColumns;
|
|
}
|
|
std::vector<rowgroup::SP_GroupConcat>& groupConcat()
|
|
{
|
|
return fGroupConcat;
|
|
}
|
|
|
|
const std::string toString() const;
|
|
|
|
protected:
|
|
uint32_t getColumnKey(const execplan::SRCP& srcp, JobInfo& jobInfo);
|
|
std::shared_ptr<int[]> makeMapping(const rowgroup::RowGroup&, const rowgroup::RowGroup&);
|
|
|
|
std::set<uint32_t> fColumns;
|
|
std::vector<rowgroup::SP_GroupConcat> fGroupConcat;
|
|
};
|
|
|
|
class GroupConcatAgUM : public rowgroup::GroupConcatAg
|
|
{
|
|
public:
|
|
EXPORT GroupConcatAgUM(rowgroup::SP_GroupConcat&);
|
|
EXPORT ~GroupConcatAgUM();
|
|
|
|
using rowgroup::GroupConcatAg::merge;
|
|
void initialize();
|
|
void processRow(const rowgroup::Row&);
|
|
EXPORT void merge(const rowgroup::Row&, int64_t);
|
|
boost::scoped_ptr<GroupConcator>& concator()
|
|
{
|
|
return fConcator;
|
|
}
|
|
|
|
EXPORT uint8_t* getResult();
|
|
|
|
protected:
|
|
void applyMapping(const std::shared_ptr<int[]>&, const rowgroup::Row&);
|
|
|
|
boost::scoped_ptr<GroupConcator> fConcator;
|
|
boost::scoped_array<uint8_t> fData;
|
|
rowgroup::Row fRow;
|
|
rowgroup::RGData fRowRGData;
|
|
rowgroup::RowGroup fRowGroup;
|
|
bool fNoOrder;
|
|
};
|
|
|
|
// GROUP_CONCAT base
|
|
class GroupConcator
|
|
{
|
|
public:
|
|
GroupConcator();
|
|
virtual ~GroupConcator();
|
|
|
|
virtual void initialize(const rowgroup::SP_GroupConcat&);
|
|
virtual void processRow(const rowgroup::Row&) = 0;
|
|
|
|
virtual void merge(GroupConcator*) = 0;
|
|
virtual uint8_t* getResultImpl(const std::string& sep) = 0;
|
|
virtual uint8_t* getResult(const std::string& sep);
|
|
uint8_t* swapStreamWithStringAndReturnBuf(ostringstream& oss, bool isNull);
|
|
|
|
virtual const std::string toString() const;
|
|
|
|
protected:
|
|
virtual bool concatColIsNull(const rowgroup::Row&);
|
|
virtual void outputRow(std::ostringstream&, const rowgroup::Row&);
|
|
virtual int64_t lengthEstimate(const rowgroup::Row&);
|
|
|
|
std::vector<uint32_t> fConcatColumns;
|
|
std::vector<std::pair<utils::NullString, uint32_t> > fConstCols;
|
|
int64_t fCurrentLength;
|
|
int64_t fGroupConcatLen;
|
|
int64_t fConstantLen;
|
|
std::unique_ptr<std::string> outputBuf_;
|
|
long fTimeZone;
|
|
};
|
|
|
|
// For GROUP_CONCAT withour distinct or orderby
|
|
class GroupConcatNoOrder : public GroupConcator
|
|
{
|
|
public:
|
|
GroupConcatNoOrder();
|
|
virtual ~GroupConcatNoOrder();
|
|
|
|
void initialize(const rowgroup::SP_GroupConcat&);
|
|
void processRow(const rowgroup::Row&);
|
|
|
|
void merge(GroupConcator*);
|
|
using GroupConcator::getResult;
|
|
uint8_t* getResultImpl(const std::string& sep);
|
|
//uint8_t* getResult(const std::string& sep);
|
|
|
|
const std::string toString() const;
|
|
|
|
protected:
|
|
rowgroup::RowGroup fRowGroup;
|
|
rowgroup::Row fRow;
|
|
rowgroup::RGData fData;
|
|
std::queue<rowgroup::RGData> fDataQueue;
|
|
uint64_t fRowsPerRG;
|
|
uint64_t fErrorCode;
|
|
uint64_t fMemSize;
|
|
ResourceManager* fRm;
|
|
boost::shared_ptr<int64_t> fSessionMemLimit;
|
|
};
|
|
|
|
// ORDER BY used in GROUP_CONCAT class
|
|
// This version is for GROUP_CONCAT, the size is limited by the group_concat_max_len.
|
|
class GroupConcatOrderBy : public GroupConcator, public ordering::IdbOrderBy
|
|
{
|
|
public:
|
|
GroupConcatOrderBy();
|
|
virtual ~GroupConcatOrderBy();
|
|
|
|
using ordering::IdbOrderBy::initialize;
|
|
void initialize(const rowgroup::SP_GroupConcat&);
|
|
void processRow(const rowgroup::Row&);
|
|
uint64_t getKeyLength() const;
|
|
|
|
void merge(GroupConcator*);
|
|
using GroupConcator::getResult;
|
|
uint8_t* getResultImpl(const std::string& sep);
|
|
//uint8_t* getResult(const std::string& sep);
|
|
|
|
const std::string toString() const;
|
|
|
|
protected:
|
|
};
|
|
|
|
} // namespace joblist
|
|
|
|
#undef EXPORT
|