1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

feat(PrimProc): MCOL-5852 disk-based GROUP_CONCAT & JSON_ARRAYAGG

* move GROUP_CONCAT/JSON_ARRAYAGG storage to the RowGroup from
  the RowAggregation*
* internal data structures (de)serialization
* get rid of a specialized classes for processing JSON_ARRAYAGG
* move the memory accounting to disk-based aggregation classes
* allow aggregation generations to be used for queries with
  GROUP_CONCAT/JSON_ARRAYAGG
* Remove the thread id from the error message as it interferes with the mtr
This commit is contained in:
Aleksei Antipovskii
2025-02-19 12:32:51 +01:00
committed by Alexey Antipovsky
parent 87d47fd7ae
commit 4bea7e59a0
25 changed files with 1339 additions and 2056 deletions

View File

@ -22,7 +22,6 @@ set(execplan_LIB_SRCS
functioncolumn.cpp
groupconcatcolumn.cpp
intervalcolumn.cpp
jsonarrayaggcolumn.cpp
logicoperator.cpp
mysqlexecutionplan.cpp
objectidmanager.cpp

View File

@ -41,11 +41,12 @@ namespace execplan
/**
* Constructors/Destructors
*/
GroupConcatColumn::GroupConcatColumn() : AggregateColumn()
GroupConcatColumn::GroupConcatColumn(bool isJsonArrayAgg) : AggregateColumn(), fIsJsonArrayAgg(isJsonArrayAgg)
{
}
GroupConcatColumn::GroupConcatColumn(const uint32_t sessionID) : AggregateColumn(sessionID)
GroupConcatColumn::GroupConcatColumn(const uint32_t sessionID, bool isJsonArrayAgg)
: AggregateColumn(sessionID), fIsJsonArrayAgg(isJsonArrayAgg)
{
}
@ -53,10 +54,7 @@ GroupConcatColumn::GroupConcatColumn(const GroupConcatColumn& rhs, const uint32_
: AggregateColumn(dynamic_cast<const AggregateColumn&>(rhs))
, fOrderCols(rhs.fOrderCols)
, fSeparator(rhs.fSeparator)
{
}
GroupConcatColumn::~GroupConcatColumn()
, fIsJsonArrayAgg(rhs.fIsJsonArrayAgg)
{
}
@ -67,16 +65,26 @@ GroupConcatColumn::~GroupConcatColumn()
const string GroupConcatColumn::toString() const
{
ostringstream output;
output << "GroupConcatColumn " << data() << endl;
output << AggregateColumn::toString() << endl;
output << "Group Concat Order Columns: " << endl;
if (fIsJsonArrayAgg)
{
output << "JsonArrayAggColumn " << data() << endl;
output << AggregateColumn::toString() << endl;
output << "Json Array Order Columns: " << endl;
}
else
{
output << "GroupConcatColumn " << data() << endl;
output << AggregateColumn::toString() << endl;
output << "Group Concat Order Columns: " << endl;
}
for (uint32_t i = 0; i < fOrderCols.size(); i++)
{
output << *fOrderCols[i];
}
output << "\nSeparator: " << fSeparator << endl;
if (!fIsJsonArrayAgg)
output << "\nSeparator: " << fSeparator << endl;
return output.str();
}
@ -84,7 +92,7 @@ string GroupConcatColumn::toCppCode(IncludeSet& includes) const
{
includes.insert("groupconcatcolumn.h");
stringstream ss;
ss << "GroupConcatColumn(" << sessionID() << ")";
ss << "GroupConcatColumn(" << sessionID() << "," << std::boolalpha << fIsJsonArrayAgg << ")";
return ss.str();
}
@ -100,13 +108,13 @@ void GroupConcatColumn::serialize(messageqcpp::ByteStream& b) const
b << (uint8_t)ObjectReader::GROUPCONCATCOLUMN;
AggregateColumn::serialize(b);
CalpontSelectExecutionPlan::ReturnedColumnList::const_iterator rcit;
b << static_cast<uint32_t>(fOrderCols.size());
for (rcit = fOrderCols.begin(); rcit != fOrderCols.end(); ++rcit)
(*rcit)->serialize(b);
for (const auto& col : fOrderCols)
col->serialize(b);
b << fSeparator;
b << (uint8_t)fIsJsonArrayAgg;
}
void GroupConcatColumn::unserialize(messageqcpp::ByteStream& b)
@ -127,6 +135,9 @@ void GroupConcatColumn::unserialize(messageqcpp::ByteStream& b)
}
b >> fSeparator;
uint8_t tmp8;
b >> tmp8;
fIsJsonArrayAgg = tmp8;
}
bool GroupConcatColumn::operator==(const GroupConcatColumn& t) const
@ -156,6 +167,9 @@ bool GroupConcatColumn::operator==(const GroupConcatColumn& t) const
if (fSeparator != t.fSeparator)
return false;
if (fIsJsonArrayAgg != t.fIsJsonArrayAgg)
return false;
return true;
}

View File

@ -50,16 +50,16 @@ class GroupConcatColumn : public AggregateColumn
/**
* Constructors
*/
GroupConcatColumn();
explicit GroupConcatColumn(bool isJsonArrayAgg = false);
explicit GroupConcatColumn(const uint32_t sessionID);
explicit GroupConcatColumn(const uint32_t sessionID, bool isJsonArrayAgg = false);
GroupConcatColumn(const GroupConcatColumn& rhs, const uint32_t sessionID = 0);
/**
* Destructors
*/
~GroupConcatColumn() override;
~GroupConcatColumn() override = default;
/**
* Overloaded stream operator
@ -140,6 +140,7 @@ class GroupConcatColumn : public AggregateColumn
private:
std::vector<SRCP> fOrderCols;
std::string fSeparator;
bool fIsJsonArrayAgg{false};
};
/**

View File

@ -1,180 +0,0 @@
/* Copyright (C) 2022 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <sstream>
#include <cstring>
using namespace std;
#include "bytestream.h"
using namespace messageqcpp;
#include "rowgroup.h"
using namespace rowgroup;
#include "joblisttypes.h"
using namespace joblist;
#include "simplefilter.h"
#include "constantfilter.h"
#include "arithmeticcolumn.h"
#include "functioncolumn.h"
#include "objectreader.h"
#include "jsonarrayaggcolumn.h"
namespace execplan
{
/**
* Constructors/Destructors
*/
JsonArrayAggColumn::JsonArrayAggColumn() : AggregateColumn()
{
}
JsonArrayAggColumn::JsonArrayAggColumn(const uint32_t sessionID) : AggregateColumn(sessionID)
{
}
JsonArrayAggColumn::JsonArrayAggColumn(const JsonArrayAggColumn& rhs, const uint32_t sessionID)
: AggregateColumn(dynamic_cast<const AggregateColumn&>(rhs))
, fOrderCols(rhs.fOrderCols)
, fSeparator(rhs.fSeparator)
{
}
JsonArrayAggColumn::~JsonArrayAggColumn()
{
}
/**
* Methods
*/
const string JsonArrayAggColumn::toString() const
{
ostringstream output;
output << "JsonArrayAggColumn " << data() << endl;
output << AggregateColumn::toString() << endl;
output << "Json Array Order Columns: " << endl;
for (uint32_t i = 0; i < fOrderCols.size(); i++)
{
output << *fOrderCols[i];
}
return output.str();
}
ostream& operator<<(ostream& output, const JsonArrayAggColumn& rhs)
{
output << rhs.toString();
return output;
}
void JsonArrayAggColumn::serialize(messageqcpp::ByteStream& b) const
{
b << (uint8_t)ObjectReader::GROUPCONCATCOLUMN;
AggregateColumn::serialize(b);
CalpontSelectExecutionPlan::ReturnedColumnList::const_iterator rcit;
b << static_cast<uint32_t>(fOrderCols.size());
for (rcit = fOrderCols.begin(); rcit != fOrderCols.end(); ++rcit)
(*rcit)->serialize(b);
b << fSeparator;
}
void JsonArrayAggColumn::unserialize(messageqcpp::ByteStream& b)
{
ObjectReader::checkType(b, ObjectReader::GROUPCONCATCOLUMN);
AggregateColumn::unserialize(b);
fOrderCols.erase(fOrderCols.begin(), fOrderCols.end());
uint32_t size, i;
ReturnedColumn* rc;
b >> size;
for (i = 0; i < size; i++)
{
rc = dynamic_cast<ReturnedColumn*>(ObjectReader::createTreeNode(b));
SRCP srcp(rc);
fOrderCols.push_back(srcp);
}
b >> fSeparator;
}
bool JsonArrayAggColumn::operator==(const JsonArrayAggColumn& t) const
{
const AggregateColumn *rc1, *rc2;
rc1 = static_cast<const AggregateColumn*>(this);
rc2 = static_cast<const AggregateColumn*>(&t);
if (*rc1 != *rc2)
return false;
for (uint32_t i = 0; i < fOrderCols.size(); i++)
{
if (fOrderCols[i].get() != NULL)
{
if (t.fOrderCols[i] == NULL)
return false;
if (*(fOrderCols[i].get()) != t.fOrderCols[i].get())
return false;
}
else if (t.fOrderCols[i].get() != NULL)
return false;
}
return true;
}
bool JsonArrayAggColumn::operator==(const TreeNode* t) const
{
const JsonArrayAggColumn* ac;
ac = dynamic_cast<const JsonArrayAggColumn*>(t);
if (ac == NULL)
return false;
return *this == *ac;
}
bool JsonArrayAggColumn::operator!=(const JsonArrayAggColumn& t) const
{
return !(*this == t);
}
bool JsonArrayAggColumn::operator!=(const TreeNode* t) const
{
return !(*this == t);
}
string JsonArrayAggColumn::toCppCode(IncludeSet& includes) const
{
includes.insert("jsonarrayaggcolumn.h");
stringstream ss;
ss << "JsonArrayAggColumn(" << sessionID() << ")";
return ss.str();
}
} // namespace execplan

View File

@ -1,145 +0,0 @@
/* Copyright (C) 2022 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/** @file */
#pragma once
#include <string>
#include "calpontselectexecutionplan.h"
#include "aggregatecolumn.h"
namespace messageqcpp
{
class ByteStream;
}
/**
* Namespace
*/
namespace execplan
{
/**
* @brief A class to represent a aggregate return column
*
* This class is a specialization of class ReturnedColumn that
* handles an aggregate function call (e.g., SUM, COUNT, MIN, MAX).
*/
class JsonArrayAggColumn : public AggregateColumn
{
public:
/**
* Constructors
*/
JsonArrayAggColumn();
explicit JsonArrayAggColumn(const uint32_t sessionID);
JsonArrayAggColumn(const JsonArrayAggColumn& rhs, const uint32_t sessionID = 0);
/**
* Destructors
*/
~JsonArrayAggColumn() override;
/**
* Overloaded stream operator
*/
const std::string toString() const override;
/** return a copy of this pointer
*
* deep copy of this pointer and return the copy
*/
JsonArrayAggColumn* clone() const override
{
return new JsonArrayAggColumn(*this);
}
/**
* Accessors and Mutators
*/
void orderCols(const std::vector<SRCP>& orderCols)
{
fOrderCols = orderCols;
}
std::vector<SRCP>& orderCols()
{
return fOrderCols;
}
void separator(const std::string& separator)
{
fSeparator = separator;
}
std::string& separator()
{
return fSeparator;
}
/**
* Serialize interface
*/
void serialize(messageqcpp::ByteStream&) const override;
void unserialize(messageqcpp::ByteStream&) override;
/** @brief Do a deep, strict (as opposed to semantic) equivalence test
*
* Do a deep, strict (as opposed to semantic) equivalence test.
* @return true iff every member of t is a duplicate copy of every member of this;
* false otherwise
*/
bool operator==(const TreeNode* t) const override;
/** @brief Do a deep, strict (as opposed to semantic) equivalence test
*
* Do a deep, strict (as opposed to semantic) equivalence test.
* @return true iff every member of t is a duplicate copy of every member of this;
* false otherwise
*/
using AggregateColumn::operator==;
virtual bool operator==(const JsonArrayAggColumn& t) const;
/** @brief Do a deep, strict (as opposed to semantic) equivalence test
*
* Do a deep, strict (as opposed to semantic) equivalence test.
* @return false iff every member of t is a duplicate copy of every member of this;
* true otherwise
*/
bool operator!=(const TreeNode* t) const override;
/** @brief Do a deep, strict (as opposed to semantic) equivalence test
*
* Do a deep, strict (as opposed to semantic) equivalence test.
* @return false iff every member of t is a duplicate copy of every member of this;
* true otherwise
*/
using AggregateColumn::operator!=;
virtual bool operator!=(const JsonArrayAggColumn& t) const;
std::string toCppCode(IncludeSet& includes) const override;
private:
std::vector<SRCP> fOrderCols;
std::string fSeparator;
};
/**
* stream operator
*/
std::ostream& operator<<(std::ostream& os, const JsonArrayAggColumn& rhs);
} // namespace execplan

View File

@ -25,7 +25,6 @@ set(joblist_LIB_SRCS
joblistfactory.cpp
jobstep.cpp
jobstepassociation.cpp
jsonarrayagg.cpp
lbidlist.cpp
limitedorderby.cpp
passthrucommand-jl.cpp

File diff suppressed because it is too large Load Diff

View File

@ -26,12 +26,11 @@
#include <vector>
#include <boost/scoped_ptr.hpp>
#include "returnedcolumn.h" // SRCP
#include "rowgroup.h" // RowGroup
#include "rowaggregation.h" // SP_GroupConcat
#include "limitedorderby.h" // IdbOrderBy
#define EXPORT
#include "groupconcatcolumn.h" // GroupConcatColumn
#include "returnedcolumn.h" // SRCP
#include "rowgroup.h" // RowGroup
#include "rowaggregation.h" // SP_GroupConcat
#include "limitedorderby.h" // IdbOrderBy
namespace joblist
{
@ -44,10 +43,10 @@ class GroupConcatInfo
{
public:
GroupConcatInfo();
virtual ~GroupConcatInfo();
~GroupConcatInfo();
void prepGroupConcat(JobInfo&);
virtual void mapColumns(const rowgroup::RowGroup&);
void mapColumns(const rowgroup::RowGroup&);
std::set<uint32_t>& columns()
{
@ -58,50 +57,66 @@ class GroupConcatInfo
return fGroupConcat;
}
virtual const std::string toString() const;
const std::string toString() const;
protected:
virtual uint32_t getColumnKey(const execplan::SRCP& srcp, JobInfo& jobInfo);
virtual std::shared_ptr<int[]> makeMapping(const rowgroup::RowGroup&, const rowgroup::RowGroup&);
uint32_t getColumnKey(const execplan::SRCP& srcp, JobInfo& jobInfo) const;
std::shared_ptr<int[]> makeMapping(const rowgroup::RowGroup&, const rowgroup::RowGroup&) const;
std::set<uint32_t> fColumns;
std::vector<rowgroup::SP_GroupConcat> fGroupConcat;
};
class GroupConcatAgUM : public rowgroup::GroupConcatAg
class GroupConcatAg
{
public:
EXPORT explicit GroupConcatAgUM(rowgroup::SP_GroupConcat&);
EXPORT ~GroupConcatAgUM() override;
explicit GroupConcatAg(rowgroup::SP_GroupConcat&, bool isJsonArrayAgg = false);
~GroupConcatAg();
using rowgroup::GroupConcatAg::merge;
void initialize() override;
void processRow(const rowgroup::Row&) override;
EXPORT virtual void merge(const rowgroup::Row&, int64_t);
void initialize();
void processRow(const rowgroup::Row&);
void merge(const rowgroup::Row&, uint64_t);
boost::scoped_ptr<GroupConcator>& concator()
{
return fConcator;
}
EXPORT uint8_t* getResult() override;
uint8_t* getResult();
uint32_t getGroupConcatId() const
{
return fGroupConcat->id;
}
void serialize(messageqcpp::ByteStream& bs) const;
void deserialize(messageqcpp::ByteStream& bs);
rowgroup::RGDataSizeType getDataSize() const;
protected:
virtual void applyMapping(const std::shared_ptr<int[]>&, const rowgroup::Row&);
void applyMapping(const std::shared_ptr<int[]>&, const rowgroup::Row&);
rowgroup::SP_GroupConcat fGroupConcat;
bool fIsJsonArrayAgg{false};
boost::scoped_ptr<GroupConcator> fConcator;
boost::scoped_array<uint8_t> fData;
rowgroup::Row fRow;
rowgroup::RGData fRowRGData;
rowgroup::RowGroup fRowGroup;
bool fNoOrder;
rowgroup::RGDataSizeType fMemSize{0};
};
using SP_GroupConcatAg = boost::shared_ptr<GroupConcatAg>;
// GROUP_CONCAT base
class GroupConcator
{
public:
GroupConcator();
virtual ~GroupConcator();
explicit GroupConcator(bool isJsonArrayAgg) : fIsJsonArrayAgg(isJsonArrayAgg)
{
}
virtual ~GroupConcator() = default;
virtual void initialize(const rowgroup::SP_GroupConcat&);
virtual void processRow(const rowgroup::Row&) = 0;
@ -113,6 +128,10 @@ class GroupConcator
virtual const std::string toString() const;
virtual void serialize(messageqcpp::ByteStream&) const;
virtual void deserialize(messageqcpp::ByteStream&);
virtual rowgroup::RGDataSizeType getDataSize() const = 0;
protected:
virtual bool concatColIsNull(const rowgroup::Row&);
virtual void outputRow(std::ostringstream&, const rowgroup::Row&);
@ -120,18 +139,24 @@ class GroupConcator
std::vector<uint32_t> fConcatColumns;
std::vector<std::pair<utils::NullString, uint32_t> > fConstCols;
int64_t fCurrentLength;
int64_t fGroupConcatLen;
int64_t fConstantLen;
int64_t fCurrentLength{0};
int64_t fGroupConcatLen{0};
int64_t fConstantLen{0};
std::unique_ptr<std::string> outputBuf_;
long fTimeZone;
long fTimeZone{0};
bool fIsJsonArrayAgg{false};
joblist::ResourceManager* fRm{nullptr};
boost::shared_ptr<int64_t> fSessionMemLimit;
};
// For GROUP_CONCAT withour distinct or orderby
class GroupConcatNoOrder : public GroupConcator
{
public:
GroupConcatNoOrder();
explicit GroupConcatNoOrder(bool isJsonArrayAgg) : GroupConcator(isJsonArrayAgg)
{
}
~GroupConcatNoOrder() override;
void initialize(const rowgroup::SP_GroupConcat&) override;
@ -142,32 +167,45 @@ class GroupConcatNoOrder : public GroupConcator
uint8_t* getResultImpl(const std::string& sep) override;
// uint8_t* getResult(const std::string& sep);
void serialize(messageqcpp::ByteStream&) const override;
void deserialize(messageqcpp::ByteStream&) override;
rowgroup::RGDataSizeType getDataSize() const override
{
return fMemSize;
}
const std::string toString() const override;
protected:
std::vector<rowgroup::RGDataUnPtr>& getRGDatas() { return fDataVec; }
void createNewRGData();
rowgroup::RowGroup fRowGroup;
rowgroup::Row fRow;
rowgroup::RGData fData;
std::queue<rowgroup::RGData> fDataQueue;
uint64_t fRowsPerRG;
uint64_t fErrorCode;
uint64_t fMemSize;
ResourceManager* fRm;
boost::shared_ptr<int64_t> fSessionMemLimit;
std::vector<rowgroup::RGDataUnPtr> fDataVec;
uint64_t fRowsPerRG{128};
rowgroup::RGDataSizeType fMemSize{0};
rowgroup::RGDataSizeType fCurMemSize{0};
};
// ORDER BY used in GROUP_CONCAT class
// This version is for GROUP_CONCAT, the size is limited by the group_concat_max_len.
class GroupConcatOrderBy : public GroupConcator, public ordering::IdbOrderBy
class GroupConcatOrderBy : public GroupConcator, public ordering::IdbCompare
{
public:
GroupConcatOrderBy();
explicit GroupConcatOrderBy(bool isJsonArrayAgg);
~GroupConcatOrderBy() override;
using ordering::IdbOrderBy::initialize;
using ordering::IdbCompare::initialize;
void initialize(const rowgroup::SP_GroupConcat&) override;
void processRow(const rowgroup::Row&) override;
uint64_t getKeyLength() const override;
uint64_t getKeyLength() const;
void serialize(messageqcpp::ByteStream&) const override;
void deserialize(messageqcpp::ByteStream&) override;
rowgroup::RGDataSizeType getDataSize() const override;
void merge(GroupConcator*) override;
using GroupConcator::getResult;
@ -177,8 +215,52 @@ class GroupConcatOrderBy : public GroupConcator, public ordering::IdbOrderBy
const std::string toString() const override;
protected:
struct Hasher
{
GroupConcatOrderBy* ts;
utils::Hasher_r h;
uint32_t colCount;
Hasher(GroupConcatOrderBy* t, uint32_t c) : ts(t), colCount(c)
{
}
uint64_t operator()(const rowgroup::Row::Pointer&) const;
};
struct Eq
{
GroupConcatOrderBy* ts;
uint32_t colCount;
Eq(GroupConcatOrderBy* t, uint32_t c) : ts(t), colCount(c)
{
}
bool operator()(const rowgroup::Row::Pointer&, const rowgroup::Row::Pointer&) const;
};
using DistinctMap = std::unordered_map<rowgroup::Row::Pointer, uint64_t, Hasher, Eq>;
class SortingPQ;
protected:
void createNewRGData();
uint64_t getCurrentRowIdx() const;
static uint64_t shiftGroupIdxBy(uint64_t idx, uint32_t shift);
std::vector<rowgroup::RGDataUnPtr>& getRGDatas() { return fDataVec; }
SortingPQ* getQueue() { return fOrderByQueue.get(); }
rowgroup::RGDataSizeType fMemSize{0};
static constexpr uint64_t fRowsPerRG{128};
std::vector<ordering::IdbSortSpec> fOrderByCond;
rowgroup::Row fRow0;
rowgroup::Row row1, row2;
ordering::CompareRule fRule;
std::vector<rowgroup::RGDataUnPtr> fDataVec;
bool fDistinct;
std::unique_ptr<DistinctMap> fDistinctMap;
std::unique_ptr<SortingPQ> fOrderByQueue;
};
} // namespace joblist
#undef EXPORT

View File

@ -43,7 +43,6 @@
#include "joblist.h"
#include "jobstep.h"
#include "groupconcat.h"
#include "jsonarrayagg.h"
#include "jl_logger.h"
#include "resourcemanager.h"

File diff suppressed because it is too large Load Diff

View File

@ -1,139 +0,0 @@
/* Copyright (C) 2022 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/** @file */
#pragma once
#include <utility>
#include <set>
#include <vector>
#include <boost/scoped_ptr.hpp>
#include "groupconcat.h"
#define EXPORT
namespace joblist
{
// forward reference
class JsonArrayAggregator;
class ResourceManager;
class JsonArrayInfo : public GroupConcatInfo
{
public:
void prepJsonArray(JobInfo&);
void mapColumns(const rowgroup::RowGroup&) override;
const std::string toString() const override;
protected:
uint32_t getColumnKey(const execplan::SRCP& srcp, JobInfo& jobInfo) override;
std::shared_ptr<int[]> makeMapping(const rowgroup::RowGroup&, const rowgroup::RowGroup&) override;
};
class JsonArrayAggregatAgUM : public GroupConcatAgUM
{
public:
EXPORT explicit JsonArrayAggregatAgUM(rowgroup::SP_GroupConcat&);
EXPORT ~JsonArrayAggregatAgUM() override;
using rowgroup::GroupConcatAg::merge;
void initialize() override;
void processRow(const rowgroup::Row&) override;
EXPORT void merge(const rowgroup::Row&, int64_t) override;
EXPORT void getResult(uint8_t*);
EXPORT uint8_t* getResult() override;
protected:
void applyMapping(const std::shared_ptr<int[]>&, const rowgroup::Row&) override;
};
// JSON_ARRAYAGG base
class JsonArrayAggregator : public GroupConcator
{
public:
JsonArrayAggregator();
~JsonArrayAggregator() override;
void initialize(const rowgroup::SP_GroupConcat&) override;
void processRow(const rowgroup::Row&) override = 0;
const std::string toString() const override;
protected:
bool concatColIsNull(const rowgroup::Row&) override;
void outputRow(std::ostringstream&, const rowgroup::Row&) override;
int64_t lengthEstimate(const rowgroup::Row&) override;
};
// For JSON_ARRAYAGG withour distinct or orderby
class JsonArrayAggNoOrder : public JsonArrayAggregator
{
public:
JsonArrayAggNoOrder();
~JsonArrayAggNoOrder() override;
void initialize(const rowgroup::SP_GroupConcat&) override;
void processRow(const rowgroup::Row&) override;
using GroupConcator::merge;
void merge(GroupConcator*) override;
using GroupConcator::getResult;
uint8_t* getResultImpl(const std::string& sep) override;
const std::string toString() const override;
protected:
rowgroup::RowGroup fRowGroup;
rowgroup::Row fRow;
rowgroup::RGData fData;
std::queue<rowgroup::RGData> fDataQueue;
uint64_t fRowsPerRG;
uint64_t fErrorCode;
uint64_t fMemSize;
ResourceManager* fRm;
boost::shared_ptr<int64_t> fSessionMemLimit;
};
// ORDER BY used in JSON_ARRAYAGG class
class JsonArrayAggOrderBy : public JsonArrayAggregator, public ordering::IdbOrderBy
{
public:
JsonArrayAggOrderBy();
~JsonArrayAggOrderBy() override;
using ordering::IdbOrderBy::initialize;
void initialize(const rowgroup::SP_GroupConcat&) override;
void processRow(const rowgroup::Row&) override;
uint64_t getKeyLength() const override;
using GroupConcator::merge;
void merge(GroupConcator*) override;
using GroupConcator::getResult;
uint8_t* getResultImpl(const std::string& sep) override;
const std::string toString() const override;
protected:
};
} // namespace joblist
#undef EXPORT

View File

@ -237,9 +237,6 @@ ResourceManager::ResourceManager(bool runningInExeMgr, config::Config* aConfig)
else
fUseHdfs = false;
fAllowedDiskAggregation =
getBoolVal(fRowAggregationStr, "AllowDiskBasedAggregation", defaultAllowDiskAggregation);
if (!load_encryption_keys())
{
Logger log;
@ -390,4 +387,9 @@ bool ResourceManager::getMemory(int64_t amount, bool patience)
return ret1;
}
bool ResourceManager::getAllowDiskAggregation() const
{
return getBoolVal(fRowAggregationStr, "AllowDiskBasedAggregation", defaultAllowDiskAggregation);
}
} // namespace joblist

View File

@ -156,10 +156,7 @@ class ResourceManager
return getIntVal(fExeMgrStr, "ExecQueueSize", defaultEMExecQueueSize);
}
bool getAllowDiskAggregation() const
{
return fAllowedDiskAggregation;
}
bool getAllowDiskAggregation() const;
uint64_t getDECConnectionsPerQuery() const
{
@ -528,7 +525,6 @@ class ResourceManager
bool isExeMgr;
bool fUseHdfs;
bool fAllowedDiskAggregation{false};
uint64_t fDECConnectionsPerQuery;
};

View File

@ -5632,7 +5632,7 @@ void TupleAggregateStep::threadedAggregateRowGroups(uint32_t threadID)
{
handleException(std::current_exception(), logging::tupleAggregateStepErr,
logging::ERR_AGGREGATION_TOO_BIG,
"TupleAggregateStep::threadedAggregateRowGroups()[" + std::to_string(threadID) + "]");
"TupleAggregateStep::threadedAggregateRowGroups()");
fEndOfResult = true;
fDoneAggregate = true;
}

View File

@ -71,7 +71,6 @@ using namespace cal_impl_if;
#include "functioncolumn.h"
#include "groupconcatcolumn.h"
#include "intervalcolumn.h"
#include "jsonarrayaggcolumn.h"
#include "logicoperator.h"
#include "outerjoinonfilter.h"
#include "predicateoperator.h"
@ -96,7 +95,6 @@ const uint64_t SUB_BIT = 0x02;
const uint64_t AF_BIT = 0x04;
const uint64_t CORRELATED = 0x08;
// In certain cases, gp_walk is called recursively. When done so,
// we need to bookmark the rcWorkStack for those cases where a constant
// expression such as 1=1 is used in an if statement or function call.
@ -167,7 +165,7 @@ bool itemDisablesWrapping(Item* item, gp_walk_info& gwi);
void pushReturnedCol(gp_walk_info& gwi, Item* from, SRCP rc)
{
uint32_t i;
for ( i = 0; i < gwi.processed.size(); i++)
for (i = 0; i < gwi.processed.size(); i++)
{
Item* ith = gwi.processed[i].first;
@ -352,7 +350,8 @@ cal_impl_if::gp_walk_info::~gp_walk_info()
delete ptWorkStack.top();
ptWorkStack.pop();
}
for (uint32_t i=0;i<viewList.size();i++) {
for (uint32_t i = 0; i < viewList.size(); i++)
{
delete viewList[i];
}
viewList.clear();
@ -395,7 +394,8 @@ void clearDeleteStacks(gp_walk_info& gwi)
delete gwi.ptWorkStack.top();
gwi.ptWorkStack.pop();
}
for (uint32_t i=0;i<gwi.viewList.size();i++) {
for (uint32_t i = 0; i < gwi.viewList.size(); i++)
{
delete gwi.viewList[i];
}
gwi.viewList.clear();
@ -585,7 +585,8 @@ bool sortItemIsInGrouping(Item* sort_item, ORDER* groupcol)
const Item_ref* ref_item = static_cast<const Item_ref*>(item);
item = (Item*)*ref_item->ref;
}
if (item->type() == Item::FIELD_ITEM || item->type() == Item::CONST_ITEM || item->type() == Item::NULL_ITEM)
if (item->type() == Item::FIELD_ITEM || item->type() == Item::CONST_ITEM ||
item->type() == Item::NULL_ITEM)
{
return true;
}
@ -1610,7 +1611,6 @@ uint32_t buildJoin(gp_walk_info& gwi, List<TABLE_LIST>& join_list,
ParseTree* pt = new ParseTree(onFilter);
outerJoinStack.push(pt);
}
}
else // inner join
{
@ -1706,7 +1706,7 @@ bool buildRowColumnFilter(gp_walk_info* gwip, RowColumn* rhs, RowColumn* lhs, It
// two entries have been popped from the stack already: lhs and rhs
stack<ReturnedColumn*> tmpStack;
vector<RowColumn*> valVec;
vector<SRCP> heldOutVals; // these vals are not rhs/lhs and need to be freed
vector<SRCP> heldOutVals; // these vals are not rhs/lhs and need to be freed
tmpStack.push(rhs);
tmpStack.push(lhs);
assert(gwip->rcWorkStack.size() >= ifp->argument_count() - 2);
@ -2116,7 +2116,7 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip)
sop.reset(new PredicateOperator(eqop));
SRCP scsp = gwip->scsp;
idbassert(scsp.get() != nullptr);
//sop->setOpType(gwip->scsp->resultType(), rhs->resultType());
// sop->setOpType(gwip->scsp->resultType(), rhs->resultType());
sop->setOpType(scsp->resultType(), rhs->resultType());
ConstantFilter* cf = 0;
@ -3425,8 +3425,9 @@ ReturnedColumn* wrapIntoAggregate(ReturnedColumn* rc, gp_walk_info& gwi, Item* b
ac->charsetNumber(rc->charsetNumber());
ac->orderPos(rc->orderPos());
uint32_t i;
for(i=0; i < gwi.processed.size() && !gwi.processed[i].first->eq(baseItem, false);i++)
{ }
for (i = 0; i < gwi.processed.size() && !gwi.processed[i].first->eq(baseItem, false); i++)
{
}
if (i < gwi.processed.size())
{
ac->expressionId(gwi.processed[i].second);
@ -3441,7 +3442,6 @@ ReturnedColumn* wrapIntoAggregate(ReturnedColumn* rc, gp_walk_info& gwi, Item* b
return ac;
}
ReturnedColumn* buildReturnedColumnNull(gp_walk_info& gwi)
{
if (gwi.condPush)
@ -3875,7 +3875,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo
// Could have it set if there are aggregation funcs as this function arguments.
gwi.fatalParseError = false;
//ReturnedColumn* rc = buildAggFrmTempField(sfitempp[0], gwi);
// ReturnedColumn* rc = buildAggFrmTempField(sfitempp[0], gwi);
ReturnedColumn* rc = buildReturnedColumn(sfitempp[0], gwi, nonSupport);
if (rc)
lhs = new ParseTree(rc);
@ -3895,7 +3895,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo
// Could have it set if there are aggregation funcs as this function arguments.
gwi.fatalParseError = false;
//ReturnedColumn* rc = buildAggFrmTempField(sfitempp[1], gwi);
// ReturnedColumn* rc = buildAggFrmTempField(sfitempp[1], gwi);
ReturnedColumn* rc = buildReturnedColumn(sfitempp[1], gwi, nonSupport);
if (rc)
rhs = new ParseTree(rc);
@ -4019,8 +4019,8 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo
int32_t leftColWidth = leftColType.colWidth;
int32_t rightColWidth = rightColType.colWidth;
if ((leftColWidth == datatypes::MAXDECIMALWIDTH || rightColWidth == datatypes::MAXDECIMALWIDTH)
&& datatypes::isDecimal(mysqlType.colDataType))
if ((leftColWidth == datatypes::MAXDECIMALWIDTH || rightColWidth == datatypes::MAXDECIMALWIDTH) &&
datatypes::isDecimal(mysqlType.colDataType))
{
mysqlType.colWidth = datatypes::MAXDECIMALWIDTH;
@ -4078,7 +4078,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo
strcasecmp(ac->alias().c_str(), gwi.returnedCols[i]->alias().c_str()) == 0)
{
ac->expressionId(gwi.returnedCols[i]->expressionId());
isOnSelectList = true;
isOnSelectList = true;
break;
}
}
@ -4114,7 +4114,8 @@ ReturnedColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool&
return rc;
}
ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool& nonSupport, bool selectBetweenIn)
ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool& nonSupport,
bool selectBetweenIn)
{
if (get_fe_conn_info_ptr() == NULL)
{
@ -4125,7 +4126,7 @@ ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool&
cal_connection_info* ci = static_cast<cal_connection_info*>(get_fe_conn_info_ptr());
string funcName = ifp->func_name();
if ( nullptr != dynamic_cast<Item_func_concat_operator_oracle*>(ifp))
if (nullptr != dynamic_cast<Item_func_concat_operator_oracle*>(ifp))
{
// the condition above is the only way to recognize this particular case.
funcName = "concat_operator_oracle";
@ -4356,9 +4357,9 @@ ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool&
if (mayHasBoolArg && isBoolType)
rc = buildBooleanConstantColumn(ifp->arguments()[i], gwi, nonSupport);
else
{
{
rc = buildReturnedColumn(ifp->arguments()[i], gwi, nonSupport);
}
}
// MCOL-1510 It must be a temp table field, so find the corresponding column.
if (!rc && ifp->arguments()[i]->type() == Item::REF_ITEM)
@ -5175,7 +5176,7 @@ void analyzeForImplicitGroupBy(Item* item, gp_walk_info& gwi)
if (item->type() == Item::FUNC_ITEM)
{
Item_func* ifp = static_cast<Item_func*>(item);
for(uint32_t i = 0;i<ifp->argument_count() && !gwi.implicitExplicitGroupBy;i++)
for (uint32_t i = 0; i < ifp->argument_count() && !gwi.implicitExplicitGroupBy; i++)
{
analyzeForImplicitGroupBy(ifp->arguments()[i], gwi);
}
@ -5224,7 +5225,7 @@ ReturnedColumn* buildAggregateColumnBody(Item* item, gp_walk_info& gwi)
}
else if (isp->sum_func() == Item_sum::JSON_ARRAYAGG_FUNC)
{
ac = new JsonArrayAggColumn(gwi.sessionid);
ac = new GroupConcatColumn(gwi.sessionid, true);
}
else if (isp->sum_func() == Item_sum::UDF_SUM_FUNC)
{
@ -5402,7 +5403,7 @@ ReturnedColumn* buildAggregateColumnBody(Item* item, gp_walk_info& gwi)
}
rowCol->columnVec(selCols);
(dynamic_cast<JsonArrayAggColumn*>(ac))->orderCols(orderCols);
(dynamic_cast<GroupConcatColumn*>(ac))->orderCols(orderCols);
parm.reset(rowCol);
ac->aggParms().push_back(parm);
@ -5410,7 +5411,7 @@ ReturnedColumn* buildAggregateColumnBody(Item* item, gp_walk_info& gwi)
{
string separator;
separator.assign(gc->get_separator()->ptr(), gc->get_separator()->length());
(dynamic_cast<JsonArrayAggColumn*>(ac))->separator(separator);
(dynamic_cast<GroupConcatColumn*>(ac))->separator(separator);
}
}
else if (isSupportedAggregateWithOneConstArg(isp, sfitempp))
@ -5482,21 +5483,23 @@ ReturnedColumn* buildAggregateColumnBody(Item* item, gp_walk_info& gwi)
{
//@bug5229. handle constant function on aggregate argument
ac->constCol(SRCP(rc));
// XXX: this skips restoration of clauseType.
// XXX: this skips restoration of clauseType.
break;
}
// the "rc" can be in gwi.no_parm_func_list. erase it from that list and
// then delete it.
// kludge, I know.
uint32_t i;
// the "rc" can be in gwi.no_parm_func_list. erase it from that list and
// then delete it.
// kludge, I know.
uint32_t i;
for (i = 0; gwi.no_parm_func_list[i] != rc && i < gwi.no_parm_func_list.size(); i++) { }
for (i = 0; gwi.no_parm_func_list[i] != rc && i < gwi.no_parm_func_list.size(); i++)
{
}
if (i < gwi.no_parm_func_list.size())
{
if (i < gwi.no_parm_func_list.size())
{
gwi.no_parm_func_list.erase(gwi.no_parm_func_list.begin() + i);
delete rc;
}
}
}
}
@ -6003,7 +6006,7 @@ void gp_walk(const Item* item, void* arg)
if (ifp)
{
// XXX: this looks awfuly wrong.
// XXX: this looks awfuly wrong.
SimpleColumn* scp = buildSimpleColumn(ifp, *gwip);
if (!scp)
@ -6012,7 +6015,7 @@ void gp_walk(const Item* item, void* arg)
string aliasTableName(scp->tableAlias());
scp->tableAlias(aliasTableName);
gwip->rcWorkStack.push(scp->clone());
boost::shared_ptr<SimpleColumn> scsp(scp);
boost::shared_ptr<SimpleColumn> scsp(scp);
gwip->scsp = scsp;
gwip->funcName.clear();
@ -6551,7 +6554,7 @@ void gp_walk(const Item* item, void* arg)
}
else if (col->type() == Item::FIELD_ITEM && gwip->clauseType == HAVING)
{
//ReturnedColumn* rc = buildAggFrmTempField(const_cast<Item*>(item), *gwip);
// ReturnedColumn* rc = buildAggFrmTempField(const_cast<Item*>(item), *gwip);
ReturnedColumn* rc = buildReturnedColumn(const_cast<Item*>(item), *gwip, gwip->fatalParseError);
if (rc)
gwip->rcWorkStack.push(rc);
@ -6566,7 +6569,7 @@ void gp_walk(const Item* item, void* arg)
SimpleColumn* thisSC = dynamic_cast<SimpleColumn*>(rc);
if (thisSC)
{
gwip->scsp.reset(thisSC->clone());
gwip->scsp.reset(thisSC->clone());
}
if (!rc && !cando)
{
@ -6785,14 +6788,14 @@ void parse_item(Item* item, vector<Item_field*>& field_vec, bool& hasNonSupportI
// MCOL-1510. This could be a non-supported function
// argument in form of a temp_table_field, so check
// and set hasNonSupportItem if it is so.
//ReturnedColumn* rc = NULL;
//if (gwi)
// ReturnedColumn* rc = NULL;
// if (gwi)
// rc = buildAggFrmTempField(ref, *gwi);
//if (!rc)
// if (!rc)
//{
Item_field* ifp = static_cast<Item_field*>(*(ref->ref));
field_vec.push_back(ifp);
Item_field* ifp = static_cast<Item_field*>(*(ref->ref));
field_vec.push_back(ifp);
//}
break;
}
@ -7512,7 +7515,7 @@ int processWhere(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, const s
if (!gwi.rcWorkStack.empty())
{
while(!gwi.rcWorkStack.empty())
while (!gwi.rcWorkStack.empty())
{
ReturnedColumn* t = gwi.rcWorkStack.top();
delete t;
@ -7521,7 +7524,7 @@ int processWhere(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, const s
}
if (!gwi.ptWorkStack.empty())
{
while(!gwi.ptWorkStack.empty())
while (!gwi.ptWorkStack.empty())
{
ParseTree* t = gwi.ptWorkStack.top();
delete t;
@ -7529,7 +7532,6 @@ int processWhere(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, const s
}
}
return 0;
}
@ -7818,7 +7820,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
}
gwi.clauseType = SELECT;
SELECT_LEX* oldSelectLex = gwi.select_lex; // XXX: SZ: should it be restored in case of error return?
SELECT_LEX* oldSelectLex = gwi.select_lex; // XXX: SZ: should it be restored in case of error return?
gwi.select_lex = &select_lex;
#ifdef DEBUG_WALK_COND
{
@ -7931,7 +7933,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
ReturnedColumn* rc = wrapIntoAggregate(sc, gwi, baseItem);
SRCP sprc(rc);
pushReturnedCol(gwi, baseItem, sprc);
pushReturnedCol(gwi, baseItem, sprc);
gwi.columnMap.insert(
CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), sprc));
@ -7968,7 +7970,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
// add this agg col to returnedColumnList
boost::shared_ptr<ReturnedColumn> spac(ac);
pushReturnedCol(gwi, item, spac);
pushReturnedCol(gwi, item, spac);
break;
}
@ -8027,7 +8029,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
if (!hasNonSupportItem && ifp->const_item() && !(parseInfo & AF_BIT) && tmpVec.size() == 0)
{
srcp.reset(buildReturnedColumn(item, gwi, gwi.fatalParseError));
pushReturnedCol(gwi, item, srcp);
pushReturnedCol(gwi, item, srcp);
if (ifp->name.length)
srcp->alias(ifp->name.str);
@ -8035,7 +8037,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
continue;
}
pushReturnedCol(gwi, item, srcp);
pushReturnedCol(gwi, item, srcp);
}
else // This was a vtable post-process block
{
@ -8057,7 +8059,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
if (ifp->name.length)
cc->alias(ifp->name.str);
pushReturnedCol(gwi, ifp, srcp);
pushReturnedCol(gwi, ifp, srcp);
// clear the error set by buildFunctionColumn
gwi.fatalParseError = false;
@ -8135,7 +8137,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
if (item->name.length)
srcp->alias(item->name.str);
pushReturnedCol(gwi, item, srcp);
pushReturnedCol(gwi, item, srcp);
}
break;
@ -8159,7 +8161,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
else
{
SRCP srcp(buildReturnedColumn(item, gwi, gwi.fatalParseError));
pushReturnedCol(gwi, item, srcp);
pushReturnedCol(gwi, item, srcp);
if (item->name.length)
srcp->alias(item->name.str);
@ -8255,7 +8257,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
return ER_CHECK_NOT_IMPLEMENTED;
}
pushReturnedCol(gwi, item, srcp);
pushReturnedCol(gwi, item, srcp);
break;
}
case Item::TYPE_HOLDER:
@ -9105,7 +9107,6 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
int cp_get_table_plan(THD* thd, SCSEP& csep, cal_table_info& ti, long timeZone)
{
SubQueryChainHolder chainHolder;
bool allocated = false;
gp_walk_info* gwi;

View File

@ -0,0 +1,44 @@
DROP DATABASE IF EXISTS mcol_5852;
CREATE DATABASE mcol_5852;
USE mcol_5852;
CREATE TABLE gc (
id INTEGER NOT NULL,
longtxt TEXT NOT NULL
) ENGINE=ColumnStore;
SET max_recursive_iterations=100000;
INSERT INTO gc (
WITH RECURSIVE series AS (
SELECT 1 AS id, REPEAT('=', 1024) AS longtxt
UNION ALL
SELECT id + 1 AS id, longtxt FROM series WHERE id < 50000
) SELECT id, longtxt FROM series);
SET columnstore_um_mem_limit=64;
SELECT id, GROUP_CONCAT(longtxt) FROM gc GROUP BY 1 ORDER BY 1 LIMIT 10;
ERROR HY000: Internal error: TupleAggregateStep::threadedAggregateRowGroups() MCS-2003: Aggregation/Distinct memory limit is exceeded.
SET columnstore_um_mem_limit=512;
SELECT id, GROUP_CONCAT(longtxt) FROM gc GROUP BY 1 ORDER BY 1 LIMIT 10;
id GROUP_CONCAT(longtxt)
1 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
2 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
3 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
4 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
5 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
6 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
7 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
8 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
9 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
10 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
SET columnstore_um_mem_limit=64;
SELECT id, GROUP_CONCAT(longtxt) FROM gc GROUP BY 1 ORDER BY 1 LIMIT 10;
id GROUP_CONCAT(longtxt)
1 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
2 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
3 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
4 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
5 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
6 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
7 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
8 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
9 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
10 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
DROP DATABASE mcol_5852;

View File

@ -0,0 +1,36 @@
--source ../include/have_columnstore.inc
--disable_warnings
DROP DATABASE IF EXISTS mcol_5852;
--enable_warnings
CREATE DATABASE mcol_5852;
USE mcol_5852;
CREATE TABLE gc (
id INTEGER NOT NULL,
longtxt TEXT NOT NULL
) ENGINE=ColumnStore;
SET max_recursive_iterations=100000;
INSERT INTO gc (
WITH RECURSIVE series AS (
SELECT 1 AS id, REPEAT('=', 1024) AS longtxt
UNION ALL
SELECT id + 1 AS id, longtxt FROM series WHERE id < 50000
) SELECT id, longtxt FROM series);
SET columnstore_um_mem_limit=64;
--exec /usr/bin/mcsSetConfig RowAggregation AllowDiskBasedAggregation N
--error 1815
SELECT id, GROUP_CONCAT(longtxt) FROM gc GROUP BY 1 ORDER BY 1 LIMIT 10;
SET columnstore_um_mem_limit=512;
SELECT id, GROUP_CONCAT(longtxt) FROM gc GROUP BY 1 ORDER BY 1 LIMIT 10;
SET columnstore_um_mem_limit=64;
--exec /usr/bin/mcsSetConfig RowAggregation AllowDiskBasedAggregation Y
SELECT id, GROUP_CONCAT(longtxt) FROM gc GROUP BY 1 ORDER BY 1 LIMIT 10;
# cleanup
DROP DATABASE mcol_5852;

View File

@ -431,7 +431,6 @@ ByteStream& ByteStream::operator>>(utils::NullString& s)
return *this;
}
ByteStream& ByteStream::operator>>(uint8_t*& bpr)
{
peek(bpr);

View File

@ -35,7 +35,6 @@
#include "mcs_basic_types.h"
#include "resourcemanager.h"
#include "groupconcat.h"
#include "jsonarrayagg.h"
#include "blocksize.h"
#include "errorcodes.h"
@ -537,6 +536,7 @@ RowAggregation::RowAggregation(const RowAggregation& rhs)
, fRm(rhs.fRm)
, fSessionMemLimit(rhs.fSessionMemLimit)
, fRollupFlag(rhs.fRollupFlag)
, fGroupConcat(rhs.fGroupConcat)
{
fGroupByCols.assign(rhs.fGroupByCols.begin(), rhs.fGroupByCols.end());
fFunctionCols.assign(rhs.fFunctionCols.begin(), rhs.fFunctionCols.end());
@ -661,14 +661,17 @@ void RowAggregation::resetUDAF(RowUDAFFunctionCol* rowUDAF, uint64_t funcColsIdx
//------------------------------------------------------------------------------
void RowAggregation::initialize(bool hasGroupConcat)
{
if (hasGroupConcat)
{
fRowGroupOut->setUseAggregateDataStore(true, fGroupConcat);
}
// Calculate the length of the hashmap key.
fAggMapKeyCount = fGroupByCols.size();
bool disk_agg = fRm ? fRm->getAllowDiskAggregation() : false;
bool allow_gen = true;
for (auto& fun : fFunctionCols)
{
if (fun->fAggFunction == ROWAGG_UDAF || fun->fAggFunction == ROWAGG_GROUP_CONCAT ||
fun->fAggFunction == ROWAGG_JSON_ARRAY)
if (fun->fAggFunction == ROWAGG_UDAF)
{
allow_gen = false;
break;
@ -757,8 +760,7 @@ void RowAggregation::aggReset()
bool allow_gen = true;
for (auto& fun : fFunctionCols)
{
if (fun->fAggFunction == ROWAGG_UDAF || fun->fAggFunction == ROWAGG_GROUP_CONCAT ||
fun->fAggFunction == ROWAGG_JSON_ARRAY)
if (fun->fAggFunction == ROWAGG_UDAF)
{
allow_gen = false;
break;
@ -1884,9 +1886,9 @@ void RowAggregation::mergeEntries(const Row& rowIn)
case ROWAGG_DUP_AVG:
case ROWAGG_DUP_STATS:
case ROWAGG_DUP_UDAF:
case ROWAGG_CONSTANT:
case ROWAGG_CONSTANT: break;
case ROWAGG_JSON_ARRAY:
case ROWAGG_GROUP_CONCAT: break;
case ROWAGG_GROUP_CONCAT: mergeGroupConcat(rowIn, colOut); break;
case ROWAGG_UDAF: doUDAF(rowIn, colOut, colOut, colOut + 1, i); break;
@ -2138,6 +2140,12 @@ void RowAggregation::mergeStatistics(const Row& rowIn, uint64_t colOut, uint64_t
colAux + 1);
}
void RowAggregation::mergeGroupConcat(const Row& rowIn, uint64_t colOut)
{
auto* gccAg = fRow.getAggregateData(colOut);
gccAg->merge(rowIn, colOut);
}
void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux,
uint64_t& funcColsIdx, std::vector<mcsv1sdk::mcsv1Context>* rgContextColl)
{
@ -2540,7 +2548,6 @@ RowAggregationUM::RowAggregationUM(const RowAggregationUM& rhs)
, fExpression(rhs.fExpression)
, fTotalMemUsage(rhs.fTotalMemUsage)
, fConstantAggregate(rhs.fConstantAggregate)
, fGroupConcat(rhs.fGroupConcat)
, fLastMemUsage(rhs.fLastMemUsage)
{
}
@ -2626,28 +2633,19 @@ void RowAggregationUM::attachGroupConcatAg()
{
if (fGroupConcat.size() > 0)
{
uint8_t* data = fRow.getData();
uint64_t i = 0, j = 0;
uint64_t gc_idx = 0;
for (; i < fFunctionColGc.size(); i++)
for (uint64_t i = 0; i < fFunctionColGc.size(); i++)
{
if (fFunctionColGc[i]->fAggFunction != ROWAGG_GROUP_CONCAT &&
fFunctionColGc[i]->fAggFunction != ROWAGG_JSON_ARRAY)
{
continue;
}
int64_t colOut = fFunctionColGc[i]->fOutputColumnIndex;
if (fFunctionColGc[i]->fAggFunction == ROWAGG_GROUP_CONCAT)
{
// save the object's address in the result row
SP_GroupConcatAg gcc(new joblist::GroupConcatAgUM(fGroupConcat[j++]));
fGroupConcatAg.push_back(gcc);
*((GroupConcatAg**)(data + fRow.getOffset(colOut))) = gcc.get();
}
if (fFunctionColGc[i]->fAggFunction == ROWAGG_JSON_ARRAY)
{
// save the object's address in the result row
SP_GroupConcatAg gcc(new joblist::JsonArrayAggregatAgUM(fGroupConcat[j++]));
fGroupConcatAg.push_back(gcc);
*((GroupConcatAg**)(data + fRow.getOffset(colOut))) = gcc.get();
}
joblist::SP_GroupConcatAg gcc(new joblist::GroupConcatAg(
fGroupConcat[gc_idx++], fFunctionColGc[i]->fAggFunction == ROWAGG_JSON_ARRAY));
fRow.setAggregateData(gcc, colOut);
}
}
}
@ -2706,14 +2704,9 @@ void RowAggregationUM::updateEntry(const Row& rowIn, std::vector<mcsv1sdk::mcsv1
}
case ROWAGG_GROUP_CONCAT:
{
doGroupConcat(rowIn, colIn, colOut);
break;
}
case ROWAGG_JSON_ARRAY:
{
doJsonAgg(rowIn, colIn, colOut);
doGroupConcat(rowIn, colIn, colOut);
break;
}
@ -2756,15 +2749,7 @@ void RowAggregationUM::updateEntry(const Row& rowIn, std::vector<mcsv1sdk::mcsv1
//------------------------------------------------------------------------------
void RowAggregationUM::doGroupConcat(const Row& rowIn, int64_t, int64_t o)
{
uint8_t* data = fRow.getData();
joblist::GroupConcatAgUM* gccAg = *((joblist::GroupConcatAgUM**)(data + fRow.getOffset(o)));
gccAg->processRow(rowIn);
}
void RowAggregationUM::doJsonAgg(const Row& rowIn, int64_t, int64_t o)
{
uint8_t* data = fRow.getData();
joblist::JsonArrayAggregatAgUM* gccAg = *((joblist::JsonArrayAggregatAgUM**)(data + fRow.getOffset(o)));
auto* gccAg = fRow.getAggregateData(o);
gccAg->processRow(rowIn);
}
@ -4158,30 +4143,17 @@ void RowAggregationUM::setGroupConcatString()
for (uint64_t i = 0; i < fRowGroupOut->getRowCount(); i++, fRow.nextRow())
{
for (uint64_t j = 0; j < fFunctionCols.size(); j++)
for (const auto& fcall : fFunctionCols)
{
uint8_t* data = fRow.getData();
if (fFunctionCols[j]->fAggFunction == ROWAGG_GROUP_CONCAT)
if (fcall->fAggFunction != ROWAGG_GROUP_CONCAT && fcall->fAggFunction != ROWAGG_JSON_ARRAY)
{
uint8_t* buff = data + fRow.getOffset(fFunctionCols[j]->fOutputColumnIndex);
uint8_t* gcString;
joblist::GroupConcatAgUM* gccAg = *((joblist::GroupConcatAgUM**)buff);
gcString = gccAg->getResult();
utils::ConstString str((char*)gcString, gcString ? strlen((const char*)gcString) : 0);
fRow.setStringField(str, fFunctionCols[j]->fOutputColumnIndex);
// gccAg->getResult(buff);
continue;
}
if (fFunctionCols[j]->fAggFunction == ROWAGG_JSON_ARRAY)
{
uint8_t* buff = data + fRow.getOffset(fFunctionCols[j]->fOutputColumnIndex);
uint8_t* gcString;
joblist::JsonArrayAggregatAgUM* gccAg = *((joblist::JsonArrayAggregatAgUM**)buff);
gcString = gccAg->getResult();
utils::ConstString str((char*)gcString, gcString ? strlen((char*)gcString) : 0);
fRow.setStringField(str, fFunctionCols[j]->fOutputColumnIndex);
}
auto* gccAg = fRow.getAggregateData(fcall->fOutputColumnIndex);
uint8_t* gcString = gccAg->getResult();
utils::ConstString str((char*)gcString, gcString ? strlen((const char*)gcString) : 0);
fRow.setStringField(str, fcall->fOutputColumnIndex);
}
}
}
@ -4306,14 +4278,9 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn, std::vector<mcsv1sdk::mcs
}
case ROWAGG_GROUP_CONCAT:
{
doGroupConcat(rowIn, colIn, colOut);
break;
}
case ROWAGG_JSON_ARRAY:
{
doJsonAgg(rowIn, colIn, colOut);
doGroupConcat(rowIn, colIn, colOut);
break;
}
@ -4537,15 +4504,7 @@ void RowAggregationUMP2::doStatistics(const Row& rowIn, int64_t colIn, int64_t c
//------------------------------------------------------------------------------
void RowAggregationUMP2::doGroupConcat(const Row& rowIn, int64_t i, int64_t o)
{
uint8_t* data = fRow.getData();
joblist::GroupConcatAgUM* gccAg = *((joblist::GroupConcatAgUM**)(data + fRow.getOffset(o)));
gccAg->merge(rowIn, i);
}
void RowAggregationUMP2::doJsonAgg(const Row& rowIn, int64_t i, int64_t o)
{
uint8_t* data = fRow.getData();
joblist::JsonArrayAggregatAgUM* gccAg = *((joblist::JsonArrayAggregatAgUM**)(data + fRow.getOffset(o)));
auto* gccAg = fRow.getAggregateData(o);
gccAg->merge(rowIn, i);
}
@ -4803,14 +4762,9 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn, std::vector<mcsv1sdk:
}
case ROWAGG_GROUP_CONCAT:
{
doGroupConcat(rowIn, colIn, colOut);
break;
}
case ROWAGG_JSON_ARRAY:
{
doJsonAgg(rowIn, colIn, colOut);
doGroupConcat(rowIn, colIn, colOut);
break;
}
@ -4943,17 +4897,10 @@ void RowAggregationSubDistinct::addRowGroup(const RowGroup* pRows,
//------------------------------------------------------------------------------
void RowAggregationSubDistinct::doGroupConcat(const Row& rowIn, int64_t i, int64_t o)
{
uint8_t* data = fRow.getData();
joblist::GroupConcatAgUM* gccAg = *((joblist::GroupConcatAgUM**)(data + fRow.getOffset(o)));
auto* gccAg = fRow.getAggregateData(o);
gccAg->merge(rowIn, i);
}
void RowAggregationSubDistinct::doJsonAgg(const Row& rowIn, int64_t i, int64_t o)
{
uint8_t* data = fRow.getData();
joblist::JsonArrayAggregatAgUM* gccAg = *((joblist::JsonArrayAggregatAgUM**)(data + fRow.getOffset(o)));
gccAg->merge(rowIn, i);
}
//------------------------------------------------------------------------------
// Constructor / destructor
//------------------------------------------------------------------------------
@ -5129,12 +5076,122 @@ void RowAggregationMultiDistinct::doDistinctAggregation_rowVec(
fOrigFunctionCols = nullptr;
}
GroupConcatAg::GroupConcatAg(SP_GroupConcat& gcc) : fGroupConcat(gcc)
void GroupConcat::serialize(messageqcpp::ByteStream& bs) const
{
uint64_t size;
size = fGroupCols.size();
bs << size;
for (const auto& [k, v] : fGroupCols)
{
bs << k;
bs << v;
}
size = fOrderCols.size();
bs << size;
for (const auto& [k, v] : fOrderCols)
{
bs << k;
bs << static_cast<uint8_t>(v);
}
bs << fSeparator;
size = fConstCols.size();
bs << size;
for (const auto& [k, v] : fConstCols)
{
bs << k;
bs << v;
}
bs << static_cast<uint8_t>(fDistinct);
bs << fSize;
fRowGroup.serialize(bs);
size = fRowGroup.getColumnCount() * sizeof(int);
bs << size;
bs.append(reinterpret_cast<uint8_t*>(fMapping.get()), size);
size = fOrderCond.size();
bs << size;
for (const auto& [k, v] : fOrderCond)
{
bs << k;
bs << static_cast<uint8_t>(v);
}
bs << fTimeZone;
bs << id;
}
GroupConcatAg::~GroupConcatAg()
void GroupConcat::deserialize(messageqcpp::ByteStream& bs)
{
fGroupCols.clear();
fOrderCols.clear();
fConstCols.clear();
fOrderCond.clear();
RGDataSizeType size;
bs >> size;
fGroupCols.reserve(size);
for (RGDataSizeType i = 0; i < size; ++i)
{
uint32_t f, s;
bs >> f;
bs >> s;
fGroupCols.emplace_back(f, s);
}
bs >> size;
fOrderCols.reserve(size);
for (RGDataSizeType i = 0; i < size; ++i)
{
uint32_t f;
bs >> f;
uint8_t s;
bs >> s;
fOrderCond.emplace_back(f, static_cast<bool>(s));
}
bs >> fSeparator;
bs >> size;
fConstCols.reserve(size);
for (RGDataSizeType i = 0; i < size; ++i)
{
utils::NullString f;
bs >> f;
uint32_t s;
bs >> s;
fConstCols.emplace_back(f, s);
}
uint8_t tmp8;
bs >> tmp8;
fDistinct = tmp8;
bs >> fSize;
fRowGroup.deserialize(bs);
bs >> size;
idbassert(size % sizeof(int) == 0);
fMapping.reset(new int[size / 4]);
memcpy(fMapping.get(), bs.buf(), size);
bs.advance(size);
bs >> size;
fOrderCond.reserve(size);
for (RGDataSizeType i = 0; i < size; ++i)
{
int f;
bs >> f;
uint8_t s;
bs >> s;
fOrderCond.emplace_back(f, static_cast<bool>(s));
}
bs >> fTimeZone;
bs >> id;
}
RGDataSizeType GroupConcat::getDataSize() const
{
RGDataSizeType size = 0;
size += fGroupCols.capacity() * 8;
size += fOrderCols.capacity() * 8;
size += fSeparator.capacity();
size += fConstCols.capacity() * (4 + sizeof(utils::NullString));
size += fRowGroup.getEmptySize();
size += fRowGroup.getColumnCount() * sizeof(int);
size += fOrderCols.capacity() * 8;
return size;
}
} // namespace rowgroup

View File

@ -327,7 +327,7 @@ struct ConstantAggData
typedef boost::shared_ptr<RowAggGroupByCol> SP_ROWAGG_GRPBY_t;
typedef boost::shared_ptr<RowAggFunctionCol> SP_ROWAGG_FUNC_t;
struct GroupConcat
struct GroupConcat : public messageqcpp::Serializeable
{
// GROUP_CONCAT(DISTINCT col1, 'const', col2 ORDER BY col3 desc SEPARATOR 'sep')
std::vector<std::pair<uint32_t, uint32_t>> fGroupCols; // columns to concatenate, and position
@ -340,38 +340,26 @@ struct GroupConcat
RowGroup fRowGroup;
std::shared_ptr<int[]> fMapping;
std::vector<std::pair<int, bool>> fOrderCond; // position to order by [asc/desc]
joblist::ResourceManager* fRm; // resource manager
boost::shared_ptr<int64_t> fSessionMemLimit;
long fTimeZone;
uint32_t id;
GroupConcat() : fRm(nullptr)
GroupConcat() = default;
GroupConcat(joblist::ResourceManager* rm, boost::shared_ptr<int64_t> sessLimit)
: fRm(rm)
, fSessionMemLimit(sessLimit)
{
}
void serialize(messageqcpp::ByteStream& bs) const override;
void deserialize(messageqcpp::ByteStream& bs) override;
RGDataSizeType getDataSize() const;
joblist::ResourceManager* fRm{nullptr};
boost::shared_ptr<int64_t> fSessionMemLimit;
};
typedef boost::shared_ptr<GroupConcat> SP_GroupConcat;
class GroupConcatAg
{
public:
explicit GroupConcatAg(SP_GroupConcat&);
virtual ~GroupConcatAg();
virtual void initialize() {};
virtual void processRow(const rowgroup::Row&) {};
virtual void merge(const rowgroup::Row&, uint64_t) {};
virtual uint8_t* getResult()
{
return nullptr;
}
protected:
rowgroup::SP_GroupConcat fGroupConcat;
};
typedef boost::shared_ptr<GroupConcatAg> SP_GroupConcatAg;
//------------------------------------------------------------------------------
/** @brief Class that aggregates RowGroups.
*/
@ -555,6 +543,8 @@ class RowAggregation : public messageqcpp::Serializeable
virtual void doAvg(const Row&, int64_t, int64_t, int64_t, bool merge = false);
virtual void doStatistics(const Row&, int64_t, int64_t, int64_t);
void mergeStatistics(const Row&, uint64_t colOut, uint64_t colAux);
void mergeGroupConcat(const Row& rowIn, uint64_t colOut);
virtual void doBitOp(const Row&, int64_t, int64_t, int);
virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, uint64_t& funcColsIdx,
std::vector<mcsv1sdk::mcsv1Context>* rgContextColl = nullptr);
@ -650,6 +640,8 @@ class RowAggregation : public messageqcpp::Serializeable
std::string fTmpDir =
config::Config::makeConfig()->getTempFileDir(config::Config::TempDirPurpose::Aggregates);
std::string fCompStr = config::Config::makeConfig()->getConfig("RowAggregation", "Compression");
std::vector<SP_GroupConcat> fGroupConcat;
};
//------------------------------------------------------------------------------
@ -794,7 +786,6 @@ class RowAggregationUM : public RowAggregation
// @bug3362, group_concat
virtual void doGroupConcat(const Row&, int64_t, int64_t);
virtual void doJsonAgg(const Row&, int64_t, int64_t);
virtual void setGroupConcatString();
bool fHasAvg;
@ -814,8 +805,6 @@ class RowAggregationUM : public RowAggregation
std::vector<ConstantAggData> fConstantAggregate;
// @bug3362, group_concat
std::vector<SP_GroupConcat> fGroupConcat;
std::vector<SP_GroupConcatAg> fGroupConcatAg;
std::vector<SP_ROWAGG_FUNC_t> fFunctionColGc;
private:
@ -856,7 +845,6 @@ class RowAggregationUMP2 : public RowAggregationUM
void doAvg(const Row&, int64_t, int64_t, int64_t, bool merge = false) override;
void doStatistics(const Row&, int64_t, int64_t, int64_t) override;
void doGroupConcat(const Row&, int64_t, int64_t) override;
void doJsonAgg(const Row&, int64_t, int64_t) override;
void doBitOp(const Row&, int64_t, int64_t, int) override;
void doUDAF(const Row&, int64_t, int64_t, int64_t, uint64_t& funcColsIdx,
std::vector<mcsv1sdk::mcsv1Context>* rgContextColl = nullptr) override;
@ -964,7 +952,6 @@ class RowAggregationSubDistinct : public RowAggregationUM
protected:
// virtual methods from RowAggregationUM
void doGroupConcat(const Row&, int64_t, int64_t) override;
void doJsonAgg(const Row&, int64_t, int64_t) override;
// for groupby columns and the aggregated distinct column
Row fDistRow;
boost::scoped_array<uint8_t> fDistRowData;

View File

@ -29,6 +29,8 @@
// #define NDEBUG
#include <sstream>
#include <iterator>
#include "rowaggregation.h"
using namespace std;
#include <numeric>
@ -43,6 +45,7 @@ using namespace execplan;
#include "rowgroup.h"
#include "dataconvert.h"
#include "columnwidth.h"
#include "groupconcat.h"
namespace rowgroup
{
@ -305,6 +308,72 @@ void UserDataStore::deserialize(ByteStream& bs)
return;
}
void AggregateDataStore::serialize(messageqcpp::ByteStream& bs) const
{
uint64_t size = fGroupConcat.size();
bs << size;
for (const auto& gc : fGroupConcat)
{
gc->serialize(bs);
}
size = fData.size();
bs << size;
for (const auto& gca : fData)
{
bs << gca->getGroupConcatId();
gca->serialize(bs);
}
}
void AggregateDataStore::deserialize(messageqcpp::ByteStream& bs)
{
fGroupConcat.clear();
fData.clear();
uint64_t size;
bs >> size;
fGroupConcat.resize(size);
for (uint64_t i = 0; i < size; i++)
{
fGroupConcat[i].reset(new GroupConcat());
fGroupConcat[i]->deserialize(bs);
}
bs >> size;
fData.resize(size);
for (uint64_t i = 0; i < size; i++)
{
uint32_t gc_id;
bs >> gc_id;
idbassert(gc_id < fGroupConcat.size());
fData[i].reset(new joblist::GroupConcatAg(fGroupConcat[gc_id]));
fData[i]->deserialize(bs);
}
}
uint32_t AggregateDataStore::storeAggregateData(boost::shared_ptr<joblist::GroupConcatAg>& data)
{
fData.emplace_back(data);
return fData.size() - 1;
}
boost::shared_ptr<joblist::GroupConcatAg> AggregateDataStore::getAggregateData(uint32_t pos) const
{
idbassert(pos < fData.size());
return fData[pos];
}
RGDataSizeType AggregateDataStore::getDataSize() const
{
RGDataSizeType size = 0;
for (const auto& gc : fGroupConcat)
{
size += gc->getDataSize();
}
for (const auto& gca : fData)
{
size += gca->getDataSize();
}
return size;
}
RGData::RGData(allocators::CountingAllocator<RGDataBufType>& _alloc) : RGData()
{
@ -316,29 +385,24 @@ RGData::RGData(const RowGroup& rg, uint32_t rowCount)
RGDataSizeType s = rg.getDataSize(rowCount);
rowData.reset(new uint8_t[s]);
if (rg.usesStringTable() && rowCount > 0) {
if (rg.usesStringTable() && rowCount > 0)
{
strings.reset(new StringStore());
strings->useOnlyLongStrings(rg.usesOnlyLongString());
}
if (rg.usesAggregateDataStore())
{
aggregateDataStore.reset(new AggregateDataStore(rg.getGroupConcats()));
}
userDataStore.reset();
columnCount = rg.getColumnCount();
rowSize = rg.getRowSize();
}
RGData::RGData(const RowGroup& rg)
RGData::RGData(const RowGroup& rg) : RGData(rg, rgCommonSize)
{
rowData.reset(new uint8_t[rg.getMaxDataSize()]);
if (rg.usesStringTable())
{
strings.reset(new StringStore());
strings->useOnlyLongStrings(rg.usesOnlyLongString());
}
userDataStore.reset();
columnCount = rg.getColumnCount();
rowSize = rg.getRowSize();
}
@ -371,21 +435,28 @@ void RGData::reinit(const RowGroup& rg, uint32_t rowCount)
userDataStore.reset();
if (rg.usesStringTable())
if (rg.usesStringTable() || rg.usesOnlyLongString())
{
if (alloc)
{
allocators::CountingAllocator<StringStoreBufType> ssAlloc = alloc.value();
strings.reset(new StringStore(ssAlloc));
strings->useOnlyLongStrings(rg.usesOnlyLongString());
}
else
{
strings.reset(new StringStore());
}
}
else
strings.reset();
if (rg.usesAggregateDataStore())
{
aggregateDataStore.reset(new AggregateDataStore(rg.getGroupConcats()));
}
else
aggregateDataStore.reset();
columnCount = rg.getColumnCount();
rowSize = rg.getRowSize();
}
@ -419,6 +490,14 @@ void RGData::serialize(ByteStream& bs, RGDataSizeType amount) const
}
else
bs << (uint8_t)0;
if (aggregateDataStore)
{
bs << (uint8_t)1;
aggregateDataStore->serialize(bs);
}
else
bs << (uint8_t)0;
}
void RGData::deserialize(ByteStream& bs, RGDataSizeType defAmount)
@ -473,6 +552,15 @@ void RGData::deserialize(ByteStream& bs, RGDataSizeType defAmount)
}
else
userDataStore.reset();
bs >> tmp8;
if (tmp8)
{
aggregateDataStore.reset(new AggregateDataStore());
aggregateDataStore->deserialize(bs);
}
else
aggregateDataStore.reset();
}
return;
@ -1133,8 +1221,10 @@ RowGroup::RowGroup(const RowGroup& r)
, precision(r.precision)
, rgData(r.rgData)
, strings(r.strings)
, aggregateDataStore(r.aggregateDataStore)
, useStringTable(r.useStringTable)
, useOnlyLongStrings(r.useOnlyLongStrings)
, useAggregateDataStore(r.useAggregateDataStore)
, hasCollation(r.hasCollation)
, hasLongStringField(r.hasLongStringField)
, sTableThreshold(r.sTableThreshold)
@ -1166,8 +1256,10 @@ RowGroup& RowGroup::operator=(const RowGroup& r)
precision = r.precision;
rgData = r.rgData;
strings = r.strings;
aggregateDataStore = r.aggregateDataStore;
useStringTable = r.useStringTable;
useOnlyLongStrings = r.useOnlyLongStrings;
useAggregateDataStore = r.useAggregateDataStore;
hasCollation = r.hasCollation;
hasLongStringField = r.hasLongStringField;
sTableThreshold = r.sTableThreshold;
@ -1261,6 +1353,25 @@ void RowGroup::deserialize(ByteStream& bs)
charsets.insert(charsets.begin(), charsetNumbers.size(), nullptr);
}
void RowGroup::setUseAggregateDataStore(bool b, std::span<boost::shared_ptr<GroupConcat>> group_concats)
{
idbassert(!b || !group_concats.empty());
if (useAggregateDataStore && !b)
{
fGroupConcats.clear();
}
else if (b)
{
fGroupConcats.assign(group_concats.begin(), group_concats.end());
if (rgData)
{
rgData->aggregateDataStore.reset(new AggregateDataStore(fGroupConcats));
aggregateDataStore = rgData->aggregateDataStore.get();
}
}
useAggregateDataStore = b;
}
void RowGroup::serializeRGData(ByteStream& bs) const
{
rgData->serialize(bs, getDataSize());

View File

@ -27,6 +27,7 @@
#pragma once
#include <span>
#include <vector>
#include <string>
#include <stdexcept>
@ -60,6 +61,10 @@
#include "execinfo.h"
// Workaround for my_global.h #define of isnan(X) causing a std::std namespace
namespace joblist
{
class GroupConcatAg;
}
namespace rowgroup
{
@ -172,8 +177,14 @@ class StringStore
{
return fUseStoreStringMutex;
}
void useOnlyLongStrings(bool b) { fUseOnlyLongStrings = b; }
bool useOnlyLongStrings() const { return fUseOnlyLongStrings; }
void useOnlyLongStrings(bool b)
{
fUseOnlyLongStrings = b;
}
bool useOnlyLongStrings() const
{
return fUseOnlyLongStrings;
}
// This is an overlay b/c the underlying data needs to be any size,
// and alloc'd in one chunk. data can't be a separate dynamic chunk.
@ -256,6 +267,36 @@ class UserDataStore
boost::mutex fMutex;
};
struct GroupConcat;
class AggregateDataStore
{
public:
AggregateDataStore() = default;
explicit AggregateDataStore(const std::vector<boost::shared_ptr<GroupConcat>>& groupConcat)
: fGroupConcat(groupConcat)
{
}
~AggregateDataStore() = default;
AggregateDataStore(const AggregateDataStore&) = delete;
AggregateDataStore(AggregateDataStore&&) = delete;
AggregateDataStore& operator=(const AggregateDataStore&) = delete;
AggregateDataStore& operator=(AggregateDataStore&&) = delete;
void serialize(messageqcpp::ByteStream&) const;
void deserialize(messageqcpp::ByteStream&);
uint32_t storeAggregateData(boost::shared_ptr<joblist::GroupConcatAg>& data);
boost::shared_ptr<joblist::GroupConcatAg> getAggregateData(uint32_t pos) const;
RGDataSizeType getDataSize() const;
private:
friend class RGData;
std::vector<boost::shared_ptr<GroupConcat>> fGroupConcat;
std::vector<boost::shared_ptr<joblist::GroupConcatAg>> fData;
};
class RowGroup;
class Row;
@ -331,6 +372,7 @@ class RGData
boost::shared_ptr<RGDataBufType> rowData;
boost::shared_ptr<StringStore> strings;
std::shared_ptr<UserDataStore> userDataStore;
std::shared_ptr<AggregateDataStore> aggregateDataStore;
std::optional<allocators::CountingAllocator<RGDataBufType>> alloc = {};
// Need sig to support backward compat. RGData can deserialize both forms.
@ -356,9 +398,14 @@ class Row
inline Pointer(uint8_t* d, StringStore* s, UserDataStore* u) : data(d), strings(s), userDataStore(u)
{
}
inline Pointer(uint8_t* d, StringStore* s, UserDataStore* u, AggregateDataStore* a)
: data(d), strings(s), userDataStore(u), aggregateDataStore(a)
{
}
uint8_t* data = nullptr;
StringStore* strings = nullptr;
UserDataStore* userDataStore = nullptr;
AggregateDataStore* aggregateDataStore = nullptr;
};
Row() = default;
@ -526,6 +573,8 @@ class Row
inline boost::shared_ptr<mcsv1sdk::UserData> getUserData(uint32_t colIndex) const;
inline void setUserData(mcsv1sdk::mcsv1Context& context, boost::shared_ptr<mcsv1sdk::UserData> userData,
uint32_t len, uint32_t colIndex);
inline void setAggregateData(boost::shared_ptr<joblist::GroupConcatAg> data, uint32_t colIndex);
inline joblist::GroupConcatAg* getAggregateData(uint32_t colIndex) const;
uint64_t getNullValue(uint32_t colIndex) const;
bool isNullValue(uint32_t colIndex) const;
@ -638,14 +687,15 @@ class Row
bool hasLongStringField = false;
uint32_t sTableThreshold = 20;
std::shared_ptr<bool[]> forceInline;
UserDataStore* userDataStore = nullptr; // For UDAF
UserDataStore* userDataStore = nullptr; // For UDAF
AggregateDataStore* aggregateDataStore = nullptr; // group_concat & json_arrayagg
friend class RowGroup;
};
inline Row::Pointer Row::getPointer() const
{
return Pointer(data, strings, userDataStore);
return Pointer(data, strings, userDataStore, aggregateDataStore);
}
inline uint8_t* Row::getData() const
{
@ -665,6 +715,7 @@ inline void Row::setPointer(const Pointer& p)
}
userDataStore = p.userDataStore;
aggregateDataStore = p.aggregateDataStore;
}
inline void Row::setData(const Pointer& p)
@ -1258,7 +1309,7 @@ inline void Row::setUintField(uint64_t val, uint32_t colIndex)
template <int len>
inline void Row::setIntField(int64_t val, uint32_t colIndex)
{
// idbassert(getColumnWidth(colIndex) == len);
// idbassert(getColumnWidth(colIndex) == len);
switch (len)
{
case 1: *((int8_t*)&data[offsets[colIndex]]) = val; break;
@ -1362,6 +1413,28 @@ inline void Row::setUserData(mcsv1sdk::mcsv1Context& context, boost::shared_ptr<
*((uint32_t*)&data[offsets[colIndex] + 4]) = len;
}
inline void Row::setAggregateData(boost::shared_ptr<joblist::GroupConcatAg> agData, uint32_t colIndex)
{
if (!aggregateDataStore)
{
throw std::logic_error("Row::getAggregateData: no aggregateDataStore");
}
uint32_t pos = aggregateDataStore->storeAggregateData(agData);
*((uint32_t*)&data[offsets[colIndex]]) = pos;
}
inline joblist::GroupConcatAg* Row::getAggregateData(uint32_t colIndex) const
{
if (!aggregateDataStore)
{
throw std::logic_error("Row::getAggregateData: no aggregateDataStore");
}
uint32_t pos = *((uint32_t*)&data[offsets[colIndex]]);
return aggregateDataStore->getAggregateData(pos).get();
}
inline void Row::copyField(uint32_t destIndex, uint32_t srcIndex) const
{
uint32_t n = offsets[destIndex + 1] - offsets[destIndex];
@ -1559,8 +1632,19 @@ class RowGroup : public messageqcpp::Serializeable
inline bool usesStringTable() const;
inline void setUseStringTable(bool);
void setUseOnlyLongString(bool b) { useOnlyLongStrings = b; }
bool usesOnlyLongString() const { return useOnlyLongStrings ; }
void setUseOnlyLongString(bool b)
{
useOnlyLongStrings = b;
}
bool usesOnlyLongString() const
{
return useOnlyLongStrings;
}
void setUseAggregateDataStore(bool b, std::span<boost::shared_ptr<GroupConcat>> group_concats = {});
bool usesAggregateDataStore() const
{
return useAggregateDataStore;
}
bool hasLongString() const
{
@ -1606,6 +1690,11 @@ class RowGroup : public messageqcpp::Serializeable
const CHARSET_INFO* getCharset(uint32_t col);
const auto& getGroupConcats() const
{
return fGroupConcats;
}
private:
uint32_t columnCount = 0;
uint8_t* data = nullptr;
@ -1632,19 +1721,22 @@ class RowGroup : public messageqcpp::Serializeable
// string table impl
RGData* rgData = nullptr;
StringStore* strings = nullptr; // note, strings and data belong to rgData
AggregateDataStore* aggregateDataStore = nullptr;
bool useStringTable = true;
bool useOnlyLongStrings = false;
bool useAggregateDataStore = true;
bool useAggregateDataStore = false;
bool hasCollation = false;
bool hasLongStringField = false;
uint32_t sTableThreshold = 20;
std::shared_ptr<bool[]> forceInline;
static const uint64_t headerSize = 18;
static const uint64_t rowCountOffset = 0;
static const uint64_t baseRidOffset = 4;
static const uint64_t statusOffset = 12;
static const uint64_t dbRootOffset = 14;
std::vector<boost::shared_ptr<GroupConcat>> fGroupConcats;
static constexpr uint64_t headerSize = 18;
static constexpr uint64_t rowCountOffset = 0;
static constexpr uint64_t baseRidOffset = 4;
static constexpr uint64_t statusOffset = 12;
static constexpr uint64_t dbRootOffset = 14;
};
inline uint64_t convertToRid(const uint32_t& partNum, const uint16_t& segNum, const uint8_t& extentNum,
@ -1700,12 +1792,14 @@ inline void RowGroup::getRow(uint32_t rowNum, Row* r) const
r->data = &(data[headerSize + (rowNum * r->getSize())]);
r->strings = strings;
r->userDataStore = rgData->userDataStore.get();
r->aggregateDataStore = rgData->aggregateDataStore.get();
}
inline void RowGroup::setData(RGData* rgd)
{
data = rgd->rowData.get();
strings = rgd->strings.get();
aggregateDataStore = rgd->aggregateDataStore.get();
rgData = rgd;
}
@ -1792,10 +1886,16 @@ inline uint32_t RowGroup::getRowSizeWithStrings() const
inline RGDataSizeType RowGroup::getSizeWithStrings(uint64_t n) const
{
if (strings == nullptr)
return getDataSize(n);
else
return getDataSize(n) + strings->getSize();
RGDataSizeType ret = getDataSize(n);
if (strings)
{
ret += strings->getSize();
}
if (aggregateDataStore)
{
ret += aggregateDataStore->getDataSize();
}
return ret;
}
inline uint64_t RowGroup::getSizeWithStrings() const
@ -2216,7 +2316,18 @@ inline void RGData::getRow(uint32_t num, Row* row)
idbassert(columnCount == row->getColumnCount() && rowSize == incomingRowSize);
row->setData(
Row::Pointer(&rowData[RowGroup::getHeaderSize() + (num * incomingRowSize)], strings.get(), userDataStore.get()));
Row::Pointer(&rowData[RowGroup::getHeaderSize() + (num * incomingRowSize)], strings.get(),
userDataStore.get(), aggregateDataStore.get()));
}
inline uint64_t rowGidRidToIdx(uint64_t gid, uint32_t rid, uint32_t maxRows)
{
return gid * maxRows + rid;
}
inline std::pair<uint64_t, uint64_t> rowIdxToGidRid(uint64_t idx, uint32_t maxRows)
{
return {idx / maxRows, idx % maxRows};
}
} // namespace rowgroup

View File

@ -584,6 +584,7 @@ class RowGroupStorage
, fUniqId(this)
, fTmpDir(tmpDir)
, fCompressor(compressor)
, fUseDisk(!strict)
{
if (rm)
{
@ -698,7 +699,7 @@ class RowGroupStorage
logging::ERR_AGGREGATION_TOO_BIG);
}
if (fMM->getFree() < memSz * 2)
if (fUseDisk && fMM->getFree() < memSz * 2)
{
saveRG(rgid);
fRGDatas[rgid].reset();
@ -880,8 +881,7 @@ class RowGroupStorage
*/
void getRow(uint64_t idx, Row& row)
{
uint64_t rgid = idx / fMaxRows;
uint64_t rid = idx % fMaxRows;
auto [rgid, rid] = rowIdxToGidRid(idx, fMaxRows);
if (UNLIKELY(!fRGDatas[rgid]))
{
loadRG(rgid);
@ -947,7 +947,7 @@ class RowGroupStorage
}
fLRU->add(fCurRgid);
idx = fCurRgid * fMaxRows + fRowGroupOut->getRowCount();
idx = rowGidRidToIdx(fCurRgid, fRowGroupOut->getRowCount(), fMaxRows);
fRowGroupOut->getRow(fRowGroupOut->getRowCount(), &row);
fRowGroupOut->incRowCount();
}
@ -962,7 +962,7 @@ class RowGroupStorage
*/
void putKeyRow(uint64_t idx, Row& row)
{
uint64_t rgid = idx / fMaxRows;
auto [rgid, rid] = rowIdxToGidRid(idx, fMaxRows);
while (rgid >= fRGDatas.size())
{
@ -1157,6 +1157,7 @@ class RowGroupStorage
ret->fGeneration = gen;
ret->fCompressor = fCompressor;
ret->fDumper.reset(new Dumper(fCompressor, fMM.get()));
ret->fUseDisk = fUseDisk;
ret->loadFinalizedInfo();
return ret;
}
@ -1165,8 +1166,7 @@ class RowGroupStorage
*/
void markFinalized(uint64_t idx)
{
uint64_t gid = idx / 64;
uint64_t rid = idx % 64;
auto [gid, rid] = rowIdxToGidRid(idx, 64);
if (LIKELY(fFinalizedRows.size() <= gid))
fFinalizedRows.resize(gid + 1, 0ULL);
@ -1176,8 +1176,7 @@ class RowGroupStorage
/** @brief Check if row at specified index was finalized earlier */
bool isFinalized(uint64_t idx) const
{
uint64_t gid = idx / 64;
uint64_t rid = idx % 64;
auto [gid, rid] = rowIdxToGidRid(idx, 64);
if (LIKELY(fFinalizedRows.size() <= gid))
return false;
@ -1324,6 +1323,7 @@ class RowGroupStorage
unlink(fname.c_str());
rgdata.reset(new RGData());
rgdata->deserialize(bs, fRowGroupOut->getDataSize(fMaxRows));
assert(bs.length() == 0);
fRowGroupOut->setData(rgdata.get());
auto memSz = fRowGroupOut->getSizeWithStrings(fMaxRows);
@ -1379,12 +1379,12 @@ class RowGroupStorage
fRowGroupOut->serialize(bs);
char buf[1024];
snprintf(buf, sizeof(buf), "/tmp/kemm/META-p%u-t%p", getpid(), fUniqPtr);
snprintf(buf, sizeof(buf), "%s/META-p%u-t%p", fTmpDir.c_str(), getpid(), fUniqId);
int fd = open(buf, O_WRONLY | O_TRUNC | O_CREAT, 0644);
assert(fd >= 0);
auto r = write(fd, bs.buf(), bs.length());
assert(r == bs.length());
assert(size_t(r) == bs.length());
close(fd);
}
#endif
@ -1421,6 +1421,7 @@ class RowGroupStorage
std::string fTmpDir;
compress::CompressInterface* fCompressor;
std::unique_ptr<Dumper> fDumper;
bool fUseDisk;
};
/** @brief Internal data for the hashmap */

View File

@ -526,11 +526,11 @@ int TimeCompare::operator()(IdbCompare* l, Row::Pointer r1, Row::Pointer r2)
return ret;
}
bool CompareRule::less(Row::Pointer r1, Row::Pointer r2)
bool CompareRule::less(Row::Pointer r1, Row::Pointer r2) const
{
for (auto& compare : fCompares)
for (auto* compare : fCompares)
{
int c = ((*compare)(fIdbCompare, r1, r2));
int c = (*compare)(fIdbCompare, r1, r2);
if (c < 0)
return true;

View File

@ -316,7 +316,7 @@ class CompareRule
{
}
bool less(rowgroup::Row::Pointer r1, rowgroup::Row::Pointer r2);
bool less(rowgroup::Row::Pointer r1, rowgroup::Row::Pointer r2) const;
void compileRules(const std::vector<IdbSortSpec>&, const rowgroup::RowGroup&);
void revertRules();