diff --git a/dbcon/execplan/CMakeLists.txt b/dbcon/execplan/CMakeLists.txt index e13609f91..d65503a1e 100755 --- a/dbcon/execplan/CMakeLists.txt +++ b/dbcon/execplan/CMakeLists.txt @@ -22,7 +22,6 @@ set(execplan_LIB_SRCS functioncolumn.cpp groupconcatcolumn.cpp intervalcolumn.cpp - jsonarrayaggcolumn.cpp logicoperator.cpp mysqlexecutionplan.cpp objectidmanager.cpp diff --git a/dbcon/execplan/groupconcatcolumn.cpp b/dbcon/execplan/groupconcatcolumn.cpp index 25800ce5c..f6770acbe 100644 --- a/dbcon/execplan/groupconcatcolumn.cpp +++ b/dbcon/execplan/groupconcatcolumn.cpp @@ -41,11 +41,12 @@ namespace execplan /** * Constructors/Destructors */ -GroupConcatColumn::GroupConcatColumn() : AggregateColumn() +GroupConcatColumn::GroupConcatColumn(bool isJsonArrayAgg) : AggregateColumn(), fIsJsonArrayAgg(isJsonArrayAgg) { } -GroupConcatColumn::GroupConcatColumn(const uint32_t sessionID) : AggregateColumn(sessionID) +GroupConcatColumn::GroupConcatColumn(const uint32_t sessionID, bool isJsonArrayAgg) + : AggregateColumn(sessionID), fIsJsonArrayAgg(isJsonArrayAgg) { } @@ -53,10 +54,7 @@ GroupConcatColumn::GroupConcatColumn(const GroupConcatColumn& rhs, const uint32_ : AggregateColumn(dynamic_cast(rhs)) , fOrderCols(rhs.fOrderCols) , fSeparator(rhs.fSeparator) -{ -} - -GroupConcatColumn::~GroupConcatColumn() + , fIsJsonArrayAgg(rhs.fIsJsonArrayAgg) { } @@ -67,16 +65,26 @@ GroupConcatColumn::~GroupConcatColumn() const string GroupConcatColumn::toString() const { ostringstream output; - output << "GroupConcatColumn " << data() << endl; - output << AggregateColumn::toString() << endl; - output << "Group Concat Order Columns: " << endl; + if (fIsJsonArrayAgg) + { + output << "JsonArrayAggColumn " << data() << endl; + output << AggregateColumn::toString() << endl; + output << "Json Array Order Columns: " << endl; + } + else + { + output << "GroupConcatColumn " << data() << endl; + output << AggregateColumn::toString() << endl; + output << "Group Concat Order Columns: " << endl; + } for (uint32_t i = 0; i < fOrderCols.size(); i++) { output << *fOrderCols[i]; } - output << "\nSeparator: " << fSeparator << endl; + if (!fIsJsonArrayAgg) + output << "\nSeparator: " << fSeparator << endl; return output.str(); } @@ -84,7 +92,7 @@ string GroupConcatColumn::toCppCode(IncludeSet& includes) const { includes.insert("groupconcatcolumn.h"); stringstream ss; - ss << "GroupConcatColumn(" << sessionID() << ")"; + ss << "GroupConcatColumn(" << sessionID() << "," << std::boolalpha << fIsJsonArrayAgg << ")"; return ss.str(); } @@ -100,13 +108,13 @@ void GroupConcatColumn::serialize(messageqcpp::ByteStream& b) const b << (uint8_t)ObjectReader::GROUPCONCATCOLUMN; AggregateColumn::serialize(b); - CalpontSelectExecutionPlan::ReturnedColumnList::const_iterator rcit; b << static_cast(fOrderCols.size()); - for (rcit = fOrderCols.begin(); rcit != fOrderCols.end(); ++rcit) - (*rcit)->serialize(b); + for (const auto& col : fOrderCols) + col->serialize(b); b << fSeparator; + b << (uint8_t)fIsJsonArrayAgg; } void GroupConcatColumn::unserialize(messageqcpp::ByteStream& b) @@ -127,6 +135,9 @@ void GroupConcatColumn::unserialize(messageqcpp::ByteStream& b) } b >> fSeparator; + uint8_t tmp8; + b >> tmp8; + fIsJsonArrayAgg = tmp8; } bool GroupConcatColumn::operator==(const GroupConcatColumn& t) const @@ -156,6 +167,9 @@ bool GroupConcatColumn::operator==(const GroupConcatColumn& t) const if (fSeparator != t.fSeparator) return false; + if (fIsJsonArrayAgg != t.fIsJsonArrayAgg) + return false; + return true; } diff --git a/dbcon/execplan/groupconcatcolumn.h b/dbcon/execplan/groupconcatcolumn.h index 70bc6b660..55c391f45 100644 --- a/dbcon/execplan/groupconcatcolumn.h +++ b/dbcon/execplan/groupconcatcolumn.h @@ -50,16 +50,16 @@ class GroupConcatColumn : public AggregateColumn /** * Constructors */ - GroupConcatColumn(); + explicit GroupConcatColumn(bool isJsonArrayAgg = false); - explicit GroupConcatColumn(const uint32_t sessionID); + explicit GroupConcatColumn(const uint32_t sessionID, bool isJsonArrayAgg = false); GroupConcatColumn(const GroupConcatColumn& rhs, const uint32_t sessionID = 0); /** * Destructors */ - ~GroupConcatColumn() override; + ~GroupConcatColumn() override = default; /** * Overloaded stream operator @@ -140,6 +140,7 @@ class GroupConcatColumn : public AggregateColumn private: std::vector fOrderCols; std::string fSeparator; + bool fIsJsonArrayAgg{false}; }; /** diff --git a/dbcon/execplan/jsonarrayaggcolumn.cpp b/dbcon/execplan/jsonarrayaggcolumn.cpp deleted file mode 100644 index 5181394fd..000000000 --- a/dbcon/execplan/jsonarrayaggcolumn.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* Copyright (C) 2022 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -#include -#include - -using namespace std; - -#include "bytestream.h" -using namespace messageqcpp; - -#include "rowgroup.h" -using namespace rowgroup; - -#include "joblisttypes.h" -using namespace joblist; - -#include "simplefilter.h" -#include "constantfilter.h" -#include "arithmeticcolumn.h" -#include "functioncolumn.h" -#include "objectreader.h" -#include "jsonarrayaggcolumn.h" - -namespace execplan -{ -/** - * Constructors/Destructors - */ -JsonArrayAggColumn::JsonArrayAggColumn() : AggregateColumn() -{ -} - -JsonArrayAggColumn::JsonArrayAggColumn(const uint32_t sessionID) : AggregateColumn(sessionID) -{ -} - -JsonArrayAggColumn::JsonArrayAggColumn(const JsonArrayAggColumn& rhs, const uint32_t sessionID) - : AggregateColumn(dynamic_cast(rhs)) - , fOrderCols(rhs.fOrderCols) - , fSeparator(rhs.fSeparator) -{ -} - -JsonArrayAggColumn::~JsonArrayAggColumn() -{ -} - -/** - * Methods - */ - -const string JsonArrayAggColumn::toString() const -{ - ostringstream output; - output << "JsonArrayAggColumn " << data() << endl; - output << AggregateColumn::toString() << endl; - output << "Json Array Order Columns: " << endl; - - for (uint32_t i = 0; i < fOrderCols.size(); i++) - { - output << *fOrderCols[i]; - } - - return output.str(); -} - -ostream& operator<<(ostream& output, const JsonArrayAggColumn& rhs) -{ - output << rhs.toString(); - return output; -} - -void JsonArrayAggColumn::serialize(messageqcpp::ByteStream& b) const -{ - b << (uint8_t)ObjectReader::GROUPCONCATCOLUMN; - AggregateColumn::serialize(b); - - CalpontSelectExecutionPlan::ReturnedColumnList::const_iterator rcit; - b << static_cast(fOrderCols.size()); - - for (rcit = fOrderCols.begin(); rcit != fOrderCols.end(); ++rcit) - (*rcit)->serialize(b); - - b << fSeparator; -} - -void JsonArrayAggColumn::unserialize(messageqcpp::ByteStream& b) -{ - ObjectReader::checkType(b, ObjectReader::GROUPCONCATCOLUMN); - AggregateColumn::unserialize(b); - fOrderCols.erase(fOrderCols.begin(), fOrderCols.end()); - - uint32_t size, i; - ReturnedColumn* rc; - b >> size; - - for (i = 0; i < size; i++) - { - rc = dynamic_cast(ObjectReader::createTreeNode(b)); - SRCP srcp(rc); - fOrderCols.push_back(srcp); - } - - b >> fSeparator; -} - -bool JsonArrayAggColumn::operator==(const JsonArrayAggColumn& t) const -{ - const AggregateColumn *rc1, *rc2; - - rc1 = static_cast(this); - rc2 = static_cast(&t); - - if (*rc1 != *rc2) - return false; - - for (uint32_t i = 0; i < fOrderCols.size(); i++) - { - if (fOrderCols[i].get() != NULL) - { - if (t.fOrderCols[i] == NULL) - return false; - - if (*(fOrderCols[i].get()) != t.fOrderCols[i].get()) - return false; - } - else if (t.fOrderCols[i].get() != NULL) - return false; - } - - return true; -} - -bool JsonArrayAggColumn::operator==(const TreeNode* t) const -{ - const JsonArrayAggColumn* ac; - - ac = dynamic_cast(t); - - if (ac == NULL) - return false; - - return *this == *ac; -} - -bool JsonArrayAggColumn::operator!=(const JsonArrayAggColumn& t) const -{ - return !(*this == t); -} - -bool JsonArrayAggColumn::operator!=(const TreeNode* t) const -{ - return !(*this == t); -} - -string JsonArrayAggColumn::toCppCode(IncludeSet& includes) const -{ - includes.insert("jsonarrayaggcolumn.h"); - stringstream ss; - ss << "JsonArrayAggColumn(" << sessionID() << ")"; - - return ss.str(); -} - -} // namespace execplan diff --git a/dbcon/execplan/jsonarrayaggcolumn.h b/dbcon/execplan/jsonarrayaggcolumn.h deleted file mode 100644 index edafd3d0b..000000000 --- a/dbcon/execplan/jsonarrayaggcolumn.h +++ /dev/null @@ -1,145 +0,0 @@ -/* Copyright (C) 2022 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/** @file */ - -#pragma once -#include - -#include "calpontselectexecutionplan.h" -#include "aggregatecolumn.h" - -namespace messageqcpp -{ -class ByteStream; -} - -/** - * Namespace - */ -namespace execplan -{ -/** - * @brief A class to represent a aggregate return column - * - * This class is a specialization of class ReturnedColumn that - * handles an aggregate function call (e.g., SUM, COUNT, MIN, MAX). - */ -class JsonArrayAggColumn : public AggregateColumn -{ - public: - /** - * Constructors - */ - JsonArrayAggColumn(); - - explicit JsonArrayAggColumn(const uint32_t sessionID); - - JsonArrayAggColumn(const JsonArrayAggColumn& rhs, const uint32_t sessionID = 0); - - /** - * Destructors - */ - ~JsonArrayAggColumn() override; - - /** - * Overloaded stream operator - */ - const std::string toString() const override; - - /** return a copy of this pointer - * - * deep copy of this pointer and return the copy - */ - JsonArrayAggColumn* clone() const override - { - return new JsonArrayAggColumn(*this); - } - - /** - * Accessors and Mutators - */ - void orderCols(const std::vector& orderCols) - { - fOrderCols = orderCols; - } - std::vector& orderCols() - { - return fOrderCols; - } - void separator(const std::string& separator) - { - fSeparator = separator; - } - std::string& separator() - { - return fSeparator; - } - - /** - * Serialize interface - */ - void serialize(messageqcpp::ByteStream&) const override; - void unserialize(messageqcpp::ByteStream&) override; - - /** @brief Do a deep, strict (as opposed to semantic) equivalence test - * - * Do a deep, strict (as opposed to semantic) equivalence test. - * @return true iff every member of t is a duplicate copy of every member of this; - * false otherwise - */ - bool operator==(const TreeNode* t) const override; - - /** @brief Do a deep, strict (as opposed to semantic) equivalence test - * - * Do a deep, strict (as opposed to semantic) equivalence test. - * @return true iff every member of t is a duplicate copy of every member of this; - * false otherwise - */ - using AggregateColumn::operator==; - virtual bool operator==(const JsonArrayAggColumn& t) const; - - /** @brief Do a deep, strict (as opposed to semantic) equivalence test - * - * Do a deep, strict (as opposed to semantic) equivalence test. - * @return false iff every member of t is a duplicate copy of every member of this; - * true otherwise - */ - bool operator!=(const TreeNode* t) const override; - - /** @brief Do a deep, strict (as opposed to semantic) equivalence test - * - * Do a deep, strict (as opposed to semantic) equivalence test. - * @return false iff every member of t is a duplicate copy of every member of this; - * true otherwise - */ - using AggregateColumn::operator!=; - virtual bool operator!=(const JsonArrayAggColumn& t) const; - - std::string toCppCode(IncludeSet& includes) const override; - - private: - std::vector fOrderCols; - std::string fSeparator; -}; - -/** - * stream operator - */ -std::ostream& operator<<(std::ostream& os, const JsonArrayAggColumn& rhs); - -} // namespace execplan diff --git a/dbcon/joblist/CMakeLists.txt b/dbcon/joblist/CMakeLists.txt index 6499a47b0..aabf31c48 100644 --- a/dbcon/joblist/CMakeLists.txt +++ b/dbcon/joblist/CMakeLists.txt @@ -25,7 +25,6 @@ set(joblist_LIB_SRCS joblistfactory.cpp jobstep.cpp jobstepassociation.cpp - jsonarrayagg.cpp lbidlist.cpp limitedorderby.cpp passthrucommand-jl.cpp diff --git a/dbcon/joblist/groupconcat.cpp b/dbcon/joblist/groupconcat.cpp index b67623e9d..a61627b2d 100644 --- a/dbcon/joblist/groupconcat.cpp +++ b/dbcon/joblist/groupconcat.cpp @@ -18,15 +18,16 @@ // $Id: groupconcat.cpp 9705 2013-07-17 20:06:07Z pleblanc $ +#include #include // #define NDEBUG #include +#include #include +#include "windowfunction/idborderby.h" using namespace std; - #include "errorids.h" -#include "exceptclasses.h" using namespace logging; #include "returnedcolumn.h" @@ -52,35 +53,28 @@ using namespace ordering; #include "jobstep.h" #include "jlf_common.h" -#include "limitedorderby.h" #include "mcs_decimal.h" +#include "utils/json/json.hpp" +using namespace nlohmann; + namespace joblist { // GroupConcatInfo class implementation -GroupConcatInfo::GroupConcatInfo() -{ -} - -GroupConcatInfo::~GroupConcatInfo() -{ -} +GroupConcatInfo::GroupConcatInfo() = default; +GroupConcatInfo::~GroupConcatInfo() = default; void GroupConcatInfo::prepGroupConcat(JobInfo& jobInfo) { - RetColsVector::iterator i = jobInfo.groupConcatCols.begin(); - - while (i != jobInfo.groupConcatCols.end()) + for (const auto& gccol : jobInfo.groupConcatCols) { - GroupConcatColumn* gcc = dynamic_cast(i->get()); - const RowColumn* rcp = dynamic_cast(gcc->aggParms()[0].get()); + auto* gcc = dynamic_cast(gccol.get()); + const auto* rcp = dynamic_cast(gcc->aggParms()[0].get()); - SP_GroupConcat groupConcat(new GroupConcat); + SP_GroupConcat groupConcat(new GroupConcat(jobInfo.rm, jobInfo.umMemLimit)); groupConcat->fSeparator = gcc->separator(); groupConcat->fDistinct = gcc->distinct(); groupConcat->fSize = gcc->resultType().colWidth; - groupConcat->fRm = jobInfo.rm; - groupConcat->fSessionMemLimit = jobInfo.umMemLimit; groupConcat->fTimeZone = jobInfo.timeZone; int key = -1; @@ -88,46 +82,44 @@ void GroupConcatInfo::prepGroupConcat(JobInfo& jobInfo) for (uint64_t j = 0, k = 0; j < cols.size(); j++) { - const ConstantColumn* cc = dynamic_cast(cols[j].get()); + const auto* cc = dynamic_cast(cols[j].get()); - if (cc == NULL) + if (cc == nullptr) { key = getColumnKey(cols[j], jobInfo); fColumns.insert(key); - groupConcat->fGroupCols.push_back(make_pair(key, k++)); + groupConcat->fGroupCols.emplace_back(key, k++); } else { - groupConcat->fConstCols.push_back(make_pair(cc->constval(), j)); + groupConcat->fConstCols.emplace_back(cc->constval(), j); } } vector& orderCols = gcc->orderCols(); - for (vector::iterator k = orderCols.begin(); k != orderCols.end(); k++) + for (const auto& orderCol : orderCols) { - if (dynamic_cast(k->get()) != NULL) + if (dynamic_cast(orderCol.get()) != nullptr) continue; - key = getColumnKey(*k, jobInfo); + key = getColumnKey(orderCol, jobInfo); fColumns.insert(key); - groupConcat->fOrderCols.push_back(make_pair(key, k->get()->asc())); + groupConcat->fOrderCols.emplace_back(key, orderCol->asc()); } - fGroupConcat.push_back(groupConcat); - - i++; + groupConcat->id = fGroupConcat.size(); + fGroupConcat.emplace_back(std::move(groupConcat)); } // Rare case: all columns in group_concat are constant columns, use a column in column map. - if (jobInfo.groupConcatCols.size() > 0 && fColumns.size() == 0) + if (!jobInfo.groupConcatCols.empty() && fColumns.empty()) { int key = -1; - for (vector::iterator i = jobInfo.tableList.begin(); i != jobInfo.tableList.end() && key == -1; - i++) + for (auto i = jobInfo.tableList.begin(); i != jobInfo.tableList.end() && key == -1; ++i) { - if (jobInfo.columnMap[*i].size() > 0) + if (!jobInfo.columnMap[*i].empty()) { key = *(jobInfo.columnMap[*i].begin()); } @@ -144,12 +136,12 @@ void GroupConcatInfo::prepGroupConcat(JobInfo& jobInfo) } } -uint32_t GroupConcatInfo::getColumnKey(const SRCP& srcp, JobInfo& jobInfo) +uint32_t GroupConcatInfo::getColumnKey(const SRCP& srcp, JobInfo& jobInfo) const { int colKey = -1; - const SimpleColumn* sc = dynamic_cast(srcp.get()); + const auto* sc = dynamic_cast(srcp.get()); - if (sc != NULL) + if (sc != nullptr) { if (sc->schemaName().empty()) { @@ -169,17 +161,17 @@ uint32_t GroupConcatInfo::getColumnKey(const SRCP& srcp, JobInfo& jobInfo) } else { - const ArithmeticColumn* ac = dynamic_cast(srcp.get()); - const FunctionColumn* fc = dynamic_cast(srcp.get()); + const auto* ac = dynamic_cast(srcp.get()); + const auto* fc = dynamic_cast(srcp.get()); - if (ac != NULL || fc != NULL) + if (ac != nullptr || fc != nullptr) { colKey = getExpTupleKey(jobInfo, srcp->expressionId()); } else { - cerr << "Unsupported GROUP_CONCAT column. " << srcp->toString() << endl; - throw runtime_error("Unsupported GROUP_CONCAT column."); + cerr << "Unsupported GROUP_CONCAT/JSON_ARRAYAGG column. " << srcp->toString() << endl; + throw runtime_error("Unsupported GROUP_CONCAT/JSON_ARRAYAGG column."); } } @@ -194,7 +186,7 @@ void GroupConcatInfo::mapColumns(const RowGroup& projRG) for (uint64_t i = 0; i < projRG.getColumnCount(); i++) projColumnMap[keysProj[i]] = i; - for (vector::iterator k = fGroupConcat.begin(); k != fGroupConcat.end(); k++) + for (auto k = fGroupConcat.begin(); k != fGroupConcat.end(); k++) { vector pos; vector oids; @@ -205,7 +197,7 @@ void GroupConcatInfo::mapColumns(const RowGroup& projRG) vector csNums; pos.push_back(2); - vector >::iterator i1 = (*k)->fGroupCols.begin(); + auto i1 = (*k)->fGroupCols.begin(); while (i1 != (*k)->fGroupCols.end()) { @@ -213,7 +205,7 @@ void GroupConcatInfo::mapColumns(const RowGroup& projRG) if (j == projColumnMap.end()) { - cerr << "Concat Key:" << i1->first << " is not projected." << endl; + cerr << "Concat/ArrayAgg Key:" << i1->first << " is not projected." << endl; throw runtime_error("Project error."); } @@ -225,14 +217,14 @@ void GroupConcatInfo::mapColumns(const RowGroup& projRG) scale.push_back(projRG.getScale()[j->second]); precision.push_back(projRG.getPrecision()[j->second]); - i1++; + ++i1; } - vector >::iterator i2 = (*k)->fOrderCols.begin(); + auto i2 = (*k)->fOrderCols.begin(); while (i2 != (*k)->fOrderCols.end()) { - map::iterator j = projColumnMap.find(i2->first); + auto j = projColumnMap.find(i2->first); if (j == projColumnMap.end()) { @@ -262,13 +254,13 @@ void GroupConcatInfo::mapColumns(const RowGroup& projRG) (*k)->fOrderCond.push_back(make_pair(idx, i2->second)); - i2++; + ++i2; } (*k)->fRowGroup = RowGroup(oids.size(), pos, oids, keys, types, csNums, scale, precision, projRG.getStringTableThreshold(), false); - // MCOL-5429 Use stringstore if the datatype of the groupconcat + // MCOL-5429/MCOL-5491 Use stringstore if the datatype of the groupconcat/json_arrayagg // field is a long string. if ((*k)->fRowGroup.hasLongString()) { @@ -279,7 +271,7 @@ void GroupConcatInfo::mapColumns(const RowGroup& projRG) } } -std::shared_ptr GroupConcatInfo::makeMapping(const RowGroup& in, const RowGroup& out) +std::shared_ptr GroupConcatInfo::makeMapping(const RowGroup& in, const RowGroup& out) const { // For some reason using the rowgroup mapping fcns don't work completely right in this class std::shared_ptr mapping(new int[out.getColumnCount()]); @@ -308,25 +300,24 @@ const string GroupConcatInfo::toString() const return oss.str(); } -GroupConcatAgUM::GroupConcatAgUM(rowgroup::SP_GroupConcat& gcc) : GroupConcatAg(gcc) +GroupConcatAg::GroupConcatAg(rowgroup::SP_GroupConcat& gcc, bool isJsonArrayAgg) + : fGroupConcat(gcc), fIsJsonArrayAgg(isJsonArrayAgg) { initialize(); } -GroupConcatAgUM::~GroupConcatAgUM() -{ -} +GroupConcatAg::~GroupConcatAg() = default; -void GroupConcatAgUM::initialize() +void GroupConcatAg::initialize() { if (fGroupConcat->fDistinct || fGroupConcat->fOrderCols.size() > 0) - fConcator.reset(new GroupConcatOrderBy()); + fConcator.reset(new GroupConcatOrderBy(fIsJsonArrayAgg)); else - fConcator.reset(new GroupConcatNoOrder()); + fConcator.reset(new GroupConcatNoOrder(fIsJsonArrayAgg)); fConcator->initialize(fGroupConcat); - // MCOL-5429 Use stringstore if the datatype of the groupconcat + // MCOL-5429/MCOL-5491 Use stringstore if the datatype of the group_concat/json_arrayagg // field is a long string. if (fGroupConcat->fRowGroup.hasLongString()) { @@ -338,35 +329,91 @@ void GroupConcatAgUM::initialize() fRowGroup.resetRowGroup(0); fRowGroup.initRow(&fRow); fRowGroup.getRow(0, &fRow); + fMemSize = fRowGroup.getSizeWithStrings(1); } else { fGroupConcat->fRowGroup.initRow(&fRow, true); fData.reset(new uint8_t[fRow.getSize()]); fRow.setData(rowgroup::Row::Pointer(fData.get())); + fMemSize = fRow.getSize(); } } -void GroupConcatAgUM::processRow(const rowgroup::Row& inRow) +void GroupConcatAg::processRow(const rowgroup::Row& inRow) { applyMapping(fGroupConcat->fMapping, inRow); fConcator->processRow(fRow); } -void GroupConcatAgUM::merge(const rowgroup::Row& inRow, int64_t i) +void GroupConcatAg::merge(const rowgroup::Row& inRow, uint64_t i) { - uint8_t* data = inRow.getData(); - joblist::GroupConcatAgUM* gccAg = *((joblist::GroupConcatAgUM**)(data + inRow.getOffset(i))); - + auto* gccAg = dynamic_cast(inRow.getAggregateData(i)); fConcator->merge(gccAg->concator().get()); } -uint8_t* GroupConcatAgUM::getResult() +uint8_t* GroupConcatAg::getResult() { return fConcator->getResult(fGroupConcat->fSeparator); } -void GroupConcatAgUM::applyMapping(const std::shared_ptr& mapping, const Row& row) +void GroupConcatAg::serialize(messageqcpp::ByteStream& bs) const +{ + bs << (uint8_t)fIsJsonArrayAgg; + fGroupConcat->serialize(bs); + fConcator->serialize(bs); + if (fRowGroup.hasLongString()) + { + bs << uint8_t(1); + fRowRGData.serialize(bs, fRowGroup.getDataSize(1)); + } + else + { + bs << uint8_t(0); + bs.append(fData.get(), fRow.getSize()); + } +} + +void GroupConcatAg::deserialize(messageqcpp::ByteStream& bs) +{ + uint8_t tmp8; + bs >> tmp8; + fIsJsonArrayAgg = tmp8; + fGroupConcat->deserialize(bs); + if (fGroupConcat->fDistinct || !fGroupConcat->fOrderCols.empty()) + { + fConcator.reset(new GroupConcatOrderBy(fIsJsonArrayAgg)); + } + else + { + fConcator.reset(new GroupConcatNoOrder(fIsJsonArrayAgg)); + } + fConcator->initialize(fGroupConcat); + fConcator->deserialize(bs); + bs >> tmp8; + if (tmp8) + { + fRowRGData.deserialize(bs, fRow.getSize()); + fRowGroup.setData(&fRowRGData); + fRowGroup.initRow(&fRow); + } + else + { + RGDataSizeType size; + bs >> size; + fData.reset(new uint8_t[size]); + memcpy(fData.get(), bs.buf(), size); + bs.advance(size); + fRow.setData(rowgroup::Row::Pointer(fData.get())); + } +} + +rowgroup::RGDataSizeType GroupConcatAg::getDataSize() const +{ + return fMemSize + fConcator->getDataSize(); +} + +void GroupConcatAg::applyMapping(const std::shared_ptr& mapping, const Row& row) { // For some reason the rowgroup mapping fcns don't work right in this class. for (uint64_t i = 0; i < fRow.getColumnCount(); i++) @@ -377,6 +424,7 @@ void GroupConcatAgUM::applyMapping(const std::shared_ptr& mapping, const fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::VARCHAR || fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::TEXT) { + // TODO: free previous string if it is in the StringStorage fRow.setStringField(row.getConstString(mapping[i]), i); } else if (fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::LONGDOUBLE) @@ -404,28 +452,25 @@ void GroupConcatAgUM::applyMapping(const std::shared_ptr& mapping, const } // GroupConcator class implementation -GroupConcator::GroupConcator() : fCurrentLength(0), fGroupConcatLen(0), fConstantLen(0) -{ -} - -GroupConcator::~GroupConcator() -{ -} - void GroupConcator::initialize(const rowgroup::SP_GroupConcat& gcc) { // MCOL-901 This value comes from the Server and it is // too high(1MB or 3MB by default) to allocate it for every instance. fGroupConcatLen = gcc->fSize; size_t sepSize = gcc->fSeparator.size(); - fCurrentLength -= sepSize; // XXX Yet I have to find out why spearator has c_str() as nullptr here. + fCurrentLength -= sepSize; // XXX Yet I have to find out why spearator has c_str() as nullptr here. fTimeZone = gcc->fTimeZone; fConstCols = gcc->fConstCols; fConstantLen = sepSize; - for (uint64_t i = 0; i < fConstCols.size(); i++) - fConstantLen += strlen(fConstCols[i].first.str()); + fRm = gcc->fRm; + fSessionMemLimit = gcc->fSessionMemLimit; + + for (const auto& str : views::keys(gcc->fConstCols)) + { + fConstantLen += str.length(); + } } void GroupConcator::outputRow(std::ostringstream& oss, const rowgroup::Row& row) @@ -494,7 +539,23 @@ void GroupConcator::outputRow(std::ostringstream& oss, const rowgroup::Row& row) case CalpontSystemCatalog::VARCHAR: case CalpontSystemCatalog::TEXT: { - oss << row.getStringField(*i).str(); + if (fIsJsonArrayAgg) + { + auto maybeJson = row.getStringField(*i).safeString(""); // XXX: MULL??? it is not checked anywhere. + const auto j = json::parse(maybeJson, nullptr, false); + if (j.is_discarded()) + { + oss << std::quoted(maybeJson.c_str()); + } + else + { + oss << maybeJson.c_str(); + } + } + else + { + oss << row.getStringField(*i).str(); + } break; } @@ -520,25 +581,37 @@ void GroupConcator::outputRow(std::ostringstream& oss, const rowgroup::Row& row) case CalpontSystemCatalog::DATE: { - oss << DataConvert::dateToString(row.getUintField(*i)); + if (fIsJsonArrayAgg) + oss << std::quoted(DataConvert::dateToString(row.getUintField(*i))); + else + oss << DataConvert::dateToString(row.getUintField(*i)); break; } case CalpontSystemCatalog::DATETIME: { - oss << DataConvert::datetimeToString(row.getUintField(*i)); + if (fIsJsonArrayAgg) + oss << std::quoted(DataConvert::datetimeToString(row.getUintField(*i))); + else + oss << DataConvert::datetimeToString(row.getUintField(*i)); break; } case CalpontSystemCatalog::TIMESTAMP: { - oss << DataConvert::timestampToString(row.getUintField(*i), fTimeZone); + if (fIsJsonArrayAgg) + oss << std::quoted(DataConvert::timestampToString(row.getUintField(*i), fTimeZone)); + else + oss << DataConvert::timestampToString(row.getUintField(*i), fTimeZone); break; } case CalpontSystemCatalog::TIME: { - oss << DataConvert::timeToString(row.getUintField(*i)); + if (fIsJsonArrayAgg) + oss << std::quoted(DataConvert::timeToString(row.getUintField(*i))); + else + oss << DataConvert::timeToString(row.getUintField(*i)); break; } @@ -554,18 +627,7 @@ void GroupConcator::outputRow(std::ostringstream& oss, const rowgroup::Row& row) bool GroupConcator::concatColIsNull(const rowgroup::Row& row) { - bool ret = false; - - for (vector::iterator i = fConcatColumns.begin(); i != fConcatColumns.end(); i++) - { - if (row.isNullValue(*i)) - { - ret = true; - break; - } - } - - return ret; + return ranges::any_of(fConcatColumns, [&](uint32_t idx) { return row.isNullValue(idx); }); } int64_t GroupConcator::lengthEstimate(const rowgroup::Row& row) @@ -717,40 +779,156 @@ const string GroupConcator::toString() const return oss.str(); } +void GroupConcator::serialize(messageqcpp::ByteStream& bs) const +{ + messageqcpp::serializeInlineVector(bs, fConcatColumns); + RGDataSizeType size = fConstCols.size(); + bs << size; + for (const auto& [k, v] : fConstCols) + { + bs << k; + bs << v; + } + bs << fCurrentLength; + bs << fGroupConcatLen; + bs << fConstantLen; + bs << fTimeZone; +} + +void GroupConcator::deserialize(messageqcpp::ByteStream& bs) +{ + fConstCols.clear(); + messageqcpp::deserializeInlineVector(bs, fConcatColumns); + RGDataSizeType size; + bs >> size; + fConstCols.reserve(size); + for (RGDataSizeType i = 0; i < size; i++) + { + NullString f; + bs >> f; + uint32_t s; + bs >> s; + fConstCols.emplace_back(f, s); + } + bs >> fCurrentLength; + bs >> fGroupConcatLen; + bs >> fConstantLen; + bs >> fTimeZone; +} + +class GroupConcatOrderByRow +{ + public: + GroupConcatOrderByRow(const rowgroup::Row& r, uint64_t rowIdx, ordering::CompareRule& c) + : fData(r.getPointer()), fIdx(rowIdx), fRule(&c) + { + } + bool operator<(const GroupConcatOrderByRow& rhs) const + { + return fRule->less(fData, rhs.fData); + } + rowgroup::Row::Pointer fData; + uint64_t fIdx; + ordering::CompareRule* fRule; +}; + +class GroupConcatOrderBy::SortingPQ : public priority_queue, less> +{ + public: + using BaseType = std::priority_queue, less>; + using size_type = BaseType::size_type; + + SortingPQ(size_type capacity) : BaseType() + { + reserve(capacity); + } + + SortingPQ(const container_type& v) : BaseType(less(), v) + { + } + + void reserve(size_type capacity) + { + this->c.reserve(capacity); + } + + size_type capacity() const + { + return this->c.capacity(); + } + + container_type::const_iterator begin() const + { + return this->c.begin(); + } + container_type::const_iterator end() const + { + return this->c.end(); + } + + using BaseType::empty; + using BaseType::pop; + using BaseType::push; + using BaseType::size; + using BaseType::top; +}; + // GroupConcatOrderBy class implementation -GroupConcatOrderBy::GroupConcatOrderBy() +GroupConcatOrderBy::GroupConcatOrderBy(bool isJsonArrayAgg) : GroupConcator(isJsonArrayAgg) { fRule.fIdbCompare = this; } GroupConcatOrderBy::~GroupConcatOrderBy() { + // delete compare objects + for (auto& compare : fRule.fCompares) + { + delete compare; + compare = nullptr; + } + fRule.fCompares.clear(); } void GroupConcatOrderBy::initialize(const rowgroup::SP_GroupConcat& gcc) { + gcc->fRowGroup.setUseOnlyLongString(true); + ordering::IdbCompare::initialize(gcc->fRowGroup); GroupConcator::initialize(gcc); fOrderByCond.resize(0); - for (uint64_t i = 0; i < gcc->fOrderCond.size(); i++) - fOrderByCond.push_back(IdbSortSpec(gcc->fOrderCond[i].first, gcc->fOrderCond[i].second)); - - fDistinct = gcc->fDistinct; - fRowsPerRG = 128; - fErrorCode = ERR_AGGREGATION_TOO_BIG; - fRm = gcc->fRm; - fSessionMemLimit = gcc->fSessionMemLimit; - - vector >::iterator i = gcc->fGroupCols.begin(); - while (i != gcc->fGroupCols.end()) + for (const auto& [idx, asc] : gcc->fOrderCond) { - auto x = (*i).second; - fConcatColumns.push_back(x); - i++; + fOrderByCond.emplace_back(idx, asc); } - IdbOrderBy::initialize(gcc->fRowGroup); + fDistinct = gcc->fDistinct; + + for (uint32_t x : views::values(gcc->fGroupCols)) + { + fConcatColumns.emplace_back(x); + } + + auto size = fRowGroup.getSizeWithStrings(fRowsPerRG); + fMemSize += size; + RGDataUnPtr rgdata(new RGData(fRowGroup, fRowsPerRG)); + fRowGroup.setData(rgdata.get()); + fRowGroup.resetRowGroup((0)); + fRowGroup.initRow(&fRow0); + fRowGroup.getRow(0, &fRow0); + fDataVec.emplace_back(std::move(rgdata)); + + fRule.compileRules(fOrderByCond, fRowGroup); + + fRowGroup.initRow(&row1); + fRowGroup.initRow(&row2); + + if (fDistinct) + { + fDistinctMap.reset(new DistinctMap(10, Hasher(this, getKeyLength()), Eq(this, getKeyLength()))); + } + fOrderByQueue.reset(new SortingPQ(10)); } uint64_t GroupConcatOrderBy::getKeyLength() const @@ -759,6 +937,145 @@ uint64_t GroupConcatOrderBy::getKeyLength() const return fConcatColumns.size(); // cols 0 to fConcatColumns.size() - 1 will be compared } +void GroupConcatOrderBy::serialize(messageqcpp::ByteStream& bs) const +{ + GroupConcator::serialize(bs); + uint64_t sz = fOrderByCond.size(); + bs << sz; + for (const auto& obcond : fOrderByCond) + { + bs << obcond.fIndex; + bs << obcond.fAsc; + bs << obcond.fNf; + } + sz = fDataVec.size(); + bs << sz; + for (const auto& rgdata : fDataVec) + { + rgdata->serialize(bs, fRowGroup.getDataSize(fRowsPerRG)); + } + bs << uint8_t(fDistinct); + if (fDistinct) + { + sz = fDistinctMap->size(); + bs << sz; + for (const auto& idx : views::values(*fDistinctMap)) + { + bs << idx; + } + } + sz = fOrderByQueue->size(); + bs << sz; + for (const auto& obq : *fOrderByQueue) + { + bs << obq.fIdx; + } +} + +void GroupConcatOrderBy::deserialize(messageqcpp::ByteStream& bs) +{ + GroupConcator::deserialize(bs); + fMemSize = 0; + uint64_t sz; + bs >> sz; + fOrderByCond.resize(sz); + uint8_t tmp8; + for (uint8_t i = 0; i < sz; ++i) + { + bs >> fOrderByCond[i].fIndex; + bs >> fOrderByCond[i].fAsc; + bs >> fOrderByCond[i].fNf; + } + + bs >> sz; + fDataVec.resize(sz); + if (sz > 0) + { + for (uint64_t i = 0; i < sz; ++i) + { + fDataVec[i].reset(new rowgroup::RGData(fRowGroup, fRowsPerRG)); + fDataVec[i]->deserialize(bs, fRowGroup.getDataSize(fRowsPerRG)); + fRowGroup.setData(fDataVec[i].get()); + auto rgsize = fRowGroup.getSizeWithStrings(fRowsPerRG); + fMemSize += rgsize; + } + + fRowGroup.initRow(&fRow0); + fRowGroup.getRow(fRowGroup.getRowCount() - 1, &fRow0); + } + else + { + createNewRGData(); + } + + fRule.fIdbCompare = this; + for (auto& compare : fRule.fCompares) + { + delete compare; + compare = nullptr; + } + fRule.fCompares.clear(); + fRule.compileRules(fOrderByCond, fRowGroup); + fRowGroup.initRow(&row1); + fRowGroup.initRow(&row2); + + bs >> tmp8; + fDistinct = tmp8; + if (fDistinct) + { + bs >> sz; + fDistinctMap.reset(new DistinctMap(sz, Hasher(this, getKeyLength()), Eq(this, getKeyLength()))); + for (uint64_t i = 0; i < sz; ++i) + { + uint64_t idx; + bs >> idx; + auto [gid, rid] = rowIdxToGidRid(idx, fRowsPerRG); + rowgroup::Row row; + fRowGroup.setData(fDataVec[gid].get()); + fRowGroup.initRow(&row); + fRowGroup.getRow(rid, &row); + fDistinctMap->emplace(row.getPointer(), idx); + } + } + + bs >> sz; + fOrderByQueue.reset(new SortingPQ(sz)); + for (uint64_t i = 0; i < sz; ++i) + { + uint64_t idx; + bs >> idx; + auto [gid, rid] = rowIdxToGidRid(idx, fRowsPerRG); + rowgroup::Row row; + fRowGroup.setData(fDataVec[gid].get()); + fRowGroup.initRow(&row); + fRowGroup.getRow(rid, &row); + fOrderByQueue->push(GroupConcatOrderByRow(row, idx, fRule)); + } + fRowGroup.setData(fDataVec.back().get()); + fRowGroup.getRow(fRowGroup.getRowCount() - 1, &fRow0); +} + +void GroupConcatOrderBy::createNewRGData() +{ + auto newSize = fRowGroup.getSizeWithStrings(fRowsPerRG); + + fMemSize += newSize; + + fDataVec.emplace_back(make_unique(fRowGroup, fRowsPerRG)); + fRowGroup.setData(fDataVec.back().get()); + fRowGroup.setUseOnlyLongString(true); + fRowGroup.resetRowGroup(0); + fRowGroup.initRow(&fRow0); + fRowGroup.getRow(0, &fRow0); +} + +rowgroup::RGDataSizeType GroupConcatOrderBy::getDataSize() const +{ + return fMemSize + + fOrderByQueue->capacity() * sizeof(GroupConcatOrderByRow) + + (fDistinct ? fDistinctMap->size() : 0) * 32 /* TODO: speculative unordered_map memory consumption per item, replace it with counting allocator */; +} + void GroupConcatOrderBy::processRow(const rowgroup::Row& row) { // check if this is a distinct row @@ -769,7 +1086,6 @@ void GroupConcatOrderBy::processRow(const rowgroup::Row& row) if (concatColIsNull(row)) return; - auto& orderByQueue = getQueue(); // if the row count is less than the limit if (fCurrentLength < fGroupConcatLen) { @@ -777,42 +1093,37 @@ void GroupConcatOrderBy::processRow(const rowgroup::Row& row) // the RID is no meaning here, use it to store the estimated length. int16_t estLen = lengthEstimate(fRow0); fRow0.setRid(estLen); - OrderByRow newRow(fRow0, fRule); - orderByQueue.push(newRow); + fRowGroup.incRowCount(); + + GroupConcatOrderByRow newRow(fRow0, getCurrentRowIdx(), fRule); + fOrderByQueue->push(newRow); fCurrentLength += estLen; // add to the distinct map if (fDistinct) - fDistinctMap->insert(fRow0.getPointer()); + fDistinctMap->emplace(fRow0.getPointer(), getCurrentRowIdx()); - fRowGroup.incRowCount(); fRow0.nextRow(); if (fRowGroup.getRowCount() >= fRowsPerRG) { - fDataQueue.push(fData); // A "postfix" but accurate RAM accounting that sums up sizes of RGDatas. uint64_t newSize = fRowGroup.getSizeWithStrings(); - if (!fRm->getMemory(newSize, fSessionMemLimit)) - { - cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode) << " @" << __FILE__ << ":" << __LINE__; - throw IDBExcept(fErrorCode); - } fMemSize += newSize; - fData.reinit(fRowGroup, fRowsPerRG); - fRowGroup.setData(&fData); + rowgroup::RGDataUnPtr rgdata(new rowgroup::RGData(fRowGroup, fRowsPerRG)); + fRowGroup.setData(rgdata.get()); fRowGroup.resetRowGroup(0); fRowGroup.getRow(0, &fRow0); + fDataVec.emplace_back(std::move(rgdata)); } } - - else if (fOrderByCond.size() > 0 && fRule.less(row.getPointer(), orderByQueue.top().fData)) + else if (fOrderByCond.size() > 0 && fRule.less(row.getPointer(), fOrderByQueue->top().fData)) { - OrderByRow swapRow = orderByQueue.top(); + GroupConcatOrderByRow swapRow = fOrderByQueue->top(); fRow1.setData(swapRow.fData); - orderByQueue.pop(); + fOrderByQueue->pop(); fCurrentLength -= fRow1.getRelRid(); fRow2.setData(swapRow.fData); @@ -825,66 +1136,73 @@ void GroupConcatOrderBy::processRow(const rowgroup::Row& row) // only the copyRow does useful work here fDistinctMap->erase(swapRow.fData); copyRow(row, &fRow2); - fDistinctMap->insert(swapRow.fData); + fDistinctMap->emplace(swapRow.fData, swapRow.fIdx); } int16_t estLen = lengthEstimate(fRow2); fRow2.setRid(estLen); fCurrentLength += estLen; - orderByQueue.push(swapRow); + fOrderByQueue->push(swapRow); } } void GroupConcatOrderBy::merge(GroupConcator* gc) { GroupConcatOrderBy* go = dynamic_cast(gc); - - auto& orderByQueue = getQueue(); - auto& mergeQueue = go->getQueue(); - - while (mergeQueue.empty() == false) + fMemSize += go->fMemSize; + go->fMemSize = 0; + uint32_t shift = fDataVec.size(); + auto& rgdatas = go->getRGDatas(); + for (auto& rgdata : rgdatas) { - const OrderByRow& row = mergeQueue.top(); + fDataVec.emplace_back(std::move(rgdata)); + } + rgdatas.clear(); + + auto* orderByQueue = go->getQueue(); + while (!orderByQueue->empty()) + { + GroupConcatOrderByRow row = orderByQueue->top(); + row.fIdx = shiftGroupIdxBy(row.fIdx, shift); + row.fRule = &fRule; // check if the distinct row already exists if (fDistinct && fDistinctMap->find(row.fData) != fDistinctMap->end()) { ; // no op; } - // if the row count is less than the limit else if (fCurrentLength < fGroupConcatLen) { - orderByQueue.push(row); + fOrderByQueue->push(row); row1.setData(row.fData); fCurrentLength += row1.getRelRid(); // add to the distinct map if (fDistinct) - fDistinctMap->insert(row.fData); + fDistinctMap->emplace(row.fData, row.fIdx); } - - else if (fOrderByCond.size() > 0 && fRule.less(row.fData, orderByQueue.top().fData)) + else if (fOrderByCond.size() > 0 && fRule.less(row.fData, fOrderByQueue->top().fData)) { - OrderByRow swapRow = orderByQueue.top(); + GroupConcatOrderByRow swapRow = fOrderByQueue->top(); row1.setData(swapRow.fData); - orderByQueue.pop(); + fOrderByQueue->pop(); fCurrentLength -= row1.getRelRid(); if (fDistinct) { fDistinctMap->erase(swapRow.fData); - fDistinctMap->insert(row.fData); + fDistinctMap->emplace(row.fData, row.fIdx); } row1.setData(row.fData); fCurrentLength += row1.getRelRid(); - orderByQueue.push(row); + fOrderByQueue->push(row); } - mergeQueue.pop(); + orderByQueue->pop(); } } @@ -894,42 +1212,43 @@ uint8_t* GroupConcatOrderBy::getResultImpl(const string& sep) bool addSep = false; // need to reverse the order - stack rowStack; - auto& orderByQueue = getQueue(); - - while (orderByQueue.size() > 0) + stack rowStack; + while (fOrderByQueue->size() > 0) { - rowStack.push(orderByQueue.top()); - orderByQueue.pop(); + rowStack.push(fOrderByQueue->top()); + fOrderByQueue->pop(); } size_t prevResultSize = 0; size_t rowsProcessed = 0; bool isNull = true; - while (rowStack.size() > 0) + if (rowStack.size() > 0) { - if (addSep) - oss << sep; - else - addSep = true; - - const OrderByRow& topRow = rowStack.top(); - fRow0.setData(topRow.fData); - outputRow(oss, fRow0); - isNull = false; - rowStack.pop(); - if (rowsProcessed >= fRowsPerRG) + if (fIsJsonArrayAgg) + oss << "["; + while (rowStack.size() > 0) { - size_t sizeDiff = oss.str().size() - prevResultSize; - prevResultSize = oss.str().size(); - if (!fRm->getMemory(sizeDiff, fSessionMemLimit)) + if (addSep) + oss << sep; + else + addSep = true; + + const GroupConcatOrderByRow& topRow = rowStack.top(); + fRow0.setData(topRow.fData); + outputRow(oss, fRow0); + isNull = false; + rowStack.pop(); + ++rowsProcessed; + if (rowsProcessed >= fRowsPerRG) { - cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode) << " @" << __FILE__ << ":" << __LINE__; - throw IDBExcept(fErrorCode); + size_t sizeDiff = oss.str().size() - prevResultSize; + prevResultSize = oss.str().size(); + fMemSize += sizeDiff; + rowsProcessed = 0; } - fMemSize += sizeDiff; - rowsProcessed = 0; } + if (fIsJsonArrayAgg) + oss << "]"; } return swapStreamWithStringAndReturnBuf(oss, isNull); @@ -937,10 +1256,12 @@ uint8_t* GroupConcatOrderBy::getResultImpl(const string& sep) uint8_t* GroupConcator::swapStreamWithStringAndReturnBuf(ostringstream& oss, bool isNull) { - if (isNull) { + if (isNull) + { outputBuf_.reset(); return nullptr; } + // XXX: what is all this black magic for? int64_t resultSize = oss.str().size(); oss << '\0' << '\0'; outputBuf_.reset(new std::string(std::move(*oss.rdbuf()).str())); @@ -954,6 +1275,7 @@ uint8_t* GroupConcator::swapStreamWithStringAndReturnBuf(ostringstream& oss, boo (*outputBuf_)[fGroupConcatLen + 1] = '\0'; } + // FIXME: a string_view can be returned here to get rid of strlen() later return reinterpret_cast(outputBuf_->data()); } @@ -982,16 +1304,37 @@ const string GroupConcatOrderBy::toString() const return (baseStr + oss.str()); } -// GroupConcatNoOrder class implementation -GroupConcatNoOrder::GroupConcatNoOrder() - : fRowsPerRG(128), fErrorCode(ERR_AGGREGATION_TOO_BIG), fMemSize(0), fRm(NULL) +uint64_t GroupConcatOrderBy::Hasher::operator()(const rowgroup::Row::Pointer& p) const { + Row& row = ts->row1; + row.setPointer(p); + return row.hash(colCount - 1); } +bool GroupConcatOrderBy::Eq::operator()(const rowgroup::Row::Pointer& p1, + const rowgroup::Row::Pointer& p2) const +{ + Row& r1 = ts->row1; + Row& r2 = ts->row2; + r1.setPointer(p1); + r2.setPointer(p2); + return r1.equals(r2, colCount - 1); +} + +uint64_t GroupConcatOrderBy::getCurrentRowIdx() const +{ + return rowGidRidToIdx(fDataVec.size() - 1, fRowGroup.getRowCount() - 1, fRowsPerRG); +} + +uint64_t GroupConcatOrderBy::shiftGroupIdxBy(uint64_t idx, uint32_t shift) +{ + auto [gid, rid] = rowIdxToGidRid(idx, fRowsPerRG); + return rowGidRidToIdx(gid + shift, rid, fRowsPerRG); +} + +// GroupConcatNoOrder class implementation GroupConcatNoOrder::~GroupConcatNoOrder() { - if (fRm) - fRm->returnMemory(fMemSize, fSessionMemLimit); } void GroupConcatNoOrder::initialize(const rowgroup::SP_GroupConcat& gcc) @@ -1001,31 +1344,13 @@ void GroupConcatNoOrder::initialize(const rowgroup::SP_GroupConcat& gcc) fRowGroup = gcc->fRowGroup; fRowGroup.setUseOnlyLongString(true); fRowsPerRG = 128; - fErrorCode = ERR_AGGREGATION_TOO_BIG; - fRm = gcc->fRm; - fSessionMemLimit = gcc->fSessionMemLimit; - vector >::iterator i = gcc->fGroupCols.begin(); - - while (i != gcc->fGroupCols.end()) - fConcatColumns.push_back((*(i++)).second); - - uint64_t newSize = fRowsPerRG * fRowGroup.getRowSize(); - - if (fRm && !fRm->getMemory(newSize, fSessionMemLimit)) + for (uint32_t colIdx : views::values(gcc->fGroupCols)) { - cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode) << " @" << __FILE__ << ":" << __LINE__; - throw IDBExcept(fErrorCode); + fConcatColumns.push_back(colIdx); } - fMemSize += newSize; - - fData.reinit(fRowGroup, fRowsPerRG); - fRowGroup.setData(&fData); - fRowGroup.setUseOnlyLongString(true); - fRowGroup.resetRowGroup(0); - fRowGroup.initRow(&fRow); - fRowGroup.getRow(0, &fRow); + createNewRGData(); } void GroupConcatNoOrder::processRow(const rowgroup::Row& row) @@ -1041,41 +1366,37 @@ void GroupConcatNoOrder::processRow(const rowgroup::Row& row) fCurrentLength += estLen; fRowGroup.incRowCount(); fRow.nextRow(); + auto newSize = fRowGroup.getSizeWithStrings(fRowsPerRG); + if (newSize > fCurMemSize) + { + auto diff = newSize - fCurMemSize; + fCurMemSize = newSize; + fMemSize += diff; + } if (fRowGroup.getRowCount() >= fRowsPerRG) { - // A "postfix" but accurate RAM accounting that sums up sizes of RGDatas. - uint64_t newSize = fRowGroup.getSizeWithStrings(); - - if (!fRm->getMemory(newSize, fSessionMemLimit)) - { - cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode) << " @" << __FILE__ << ":" << __LINE__; - throw IDBExcept(fErrorCode); - } - fMemSize += newSize; - - fDataQueue.push(fData); - fData.reinit(fRowGroup, fRowsPerRG); - fRowGroup.setData(&fData); - fRowGroup.resetRowGroup(0); - fRowGroup.getRow(0, &fRow); + createNewRGData(); } } } void GroupConcatNoOrder::merge(GroupConcator* gc) { - GroupConcatNoOrder* in = dynamic_cast(gc); + auto* in = dynamic_cast(gc); + assert(in != nullptr); - while (in->fDataQueue.size() > 0) + for (auto& i : in->getRGDatas()) { - fDataQueue.push(in->fDataQueue.front()); - in->fDataQueue.pop(); + fDataVec.emplace_back(std::move(i)); } - - fDataQueue.push(in->fData); + fRowGroup.setData(fDataVec.back().get()); + fRowGroup.setUseOnlyLongString(true); + fRowGroup.initRow(&fRow); + fRowGroup.getRow(fRowGroup.getRowCount(), &fRow); fMemSize += in->fMemSize; - in->fMemSize = 0; + fCurMemSize = in->fCurMemSize; + in->fMemSize = in->fCurMemSize = 0; } uint8_t* GroupConcatNoOrder::getResultImpl(const string& sep) @@ -1083,17 +1404,23 @@ uint8_t* GroupConcatNoOrder::getResultImpl(const string& sep) ostringstream oss; bool addSep = false; - fDataQueue.push(fData); size_t prevResultSize = 0; bool isNull = true; - while (fDataQueue.size() > 0) + bool addBrackets = true; + for (auto& rgdata : fDataVec) { - fRowGroup.setData(&fDataQueue.front()); + fRowGroup.setData(rgdata.get()); + fRowGroup.initRow(&fRow); fRowGroup.getRow(0, &fRow); for (uint64_t i = 0; i < fRowGroup.getRowCount(); i++) { + if (addBrackets && fIsJsonArrayAgg) + { + oss << "["; + addBrackets = false; + } if (addSep) oss << sep; else @@ -1105,21 +1432,70 @@ uint8_t* GroupConcatNoOrder::getResultImpl(const string& sep) } size_t sizeDiff = oss.str().size() - prevResultSize; prevResultSize = oss.str().size(); - if (!fRm->getMemory(sizeDiff, fSessionMemLimit)) - { - cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode) << " @" << __FILE__ << ":" << __LINE__; - throw IDBExcept(fErrorCode); - } fMemSize += sizeDiff; - fDataQueue.pop(); + rgdata.reset(); } + if (fIsJsonArrayAgg && !addBrackets) + oss << "]"; return swapStreamWithStringAndReturnBuf(oss, isNull); } +void GroupConcatNoOrder::serialize(messageqcpp::ByteStream& bs) const +{ + GroupConcator::serialize(bs); + RGDataSizeType sz = fDataVec.size(); + bs << sz; + for (auto& rgdata : fDataVec) + { + if (rgdata) + { + rgdata->serialize(bs, fRowGroup.getDataSize()); + } + } +} + +void GroupConcatNoOrder::deserialize(messageqcpp::ByteStream& bs) +{ + GroupConcator::deserialize(bs); + RGDataSizeType sz; + bs >> sz; + fMemSize = fCurMemSize = 0; + fDataVec.resize(sz); + if (sz == 0) + { + createNewRGData(); + } + else + { + for (RGDataSizeType i = 0; i < sz; i++) + { + fDataVec[i].reset(new RGData(fRowGroup, fRowsPerRG)); + fDataVec[i]->deserialize(bs, fRowGroup.getDataSize(fRowsPerRG)); + fRowGroup.setData(fDataVec[i].get()); + fCurMemSize = fRowGroup.getSizeWithStrings(fRowsPerRG); + fMemSize += fCurMemSize; + } + } +} + const string GroupConcatNoOrder::toString() const { return GroupConcator::toString(); } +void GroupConcatNoOrder::createNewRGData() +{ + auto newSize = fRowGroup.getDataSize(fRowsPerRG); + + fMemSize += newSize; + fCurMemSize = newSize; + + fDataVec.emplace_back(make_unique(fRowGroup, fRowsPerRG)); + fRowGroup.setData(fDataVec.back().get()); + fRowGroup.setUseOnlyLongString(true); + fRowGroup.resetRowGroup(0); + fRowGroup.initRow(&fRow); + fRowGroup.getRow(0, &fRow); +} } // namespace joblist diff --git a/dbcon/joblist/groupconcat.h b/dbcon/joblist/groupconcat.h index 705b691ef..a036705be 100644 --- a/dbcon/joblist/groupconcat.h +++ b/dbcon/joblist/groupconcat.h @@ -26,12 +26,11 @@ #include #include -#include "returnedcolumn.h" // SRCP -#include "rowgroup.h" // RowGroup -#include "rowaggregation.h" // SP_GroupConcat -#include "limitedorderby.h" // IdbOrderBy - -#define EXPORT +#include "groupconcatcolumn.h" // GroupConcatColumn +#include "returnedcolumn.h" // SRCP +#include "rowgroup.h" // RowGroup +#include "rowaggregation.h" // SP_GroupConcat +#include "limitedorderby.h" // IdbOrderBy namespace joblist { @@ -44,10 +43,10 @@ class GroupConcatInfo { public: GroupConcatInfo(); - virtual ~GroupConcatInfo(); + ~GroupConcatInfo(); void prepGroupConcat(JobInfo&); - virtual void mapColumns(const rowgroup::RowGroup&); + void mapColumns(const rowgroup::RowGroup&); std::set& columns() { @@ -58,50 +57,66 @@ class GroupConcatInfo return fGroupConcat; } - virtual const std::string toString() const; + const std::string toString() const; protected: - virtual uint32_t getColumnKey(const execplan::SRCP& srcp, JobInfo& jobInfo); - virtual std::shared_ptr makeMapping(const rowgroup::RowGroup&, const rowgroup::RowGroup&); + uint32_t getColumnKey(const execplan::SRCP& srcp, JobInfo& jobInfo) const; + std::shared_ptr makeMapping(const rowgroup::RowGroup&, const rowgroup::RowGroup&) const; std::set fColumns; std::vector fGroupConcat; }; -class GroupConcatAgUM : public rowgroup::GroupConcatAg +class GroupConcatAg { public: - EXPORT explicit GroupConcatAgUM(rowgroup::SP_GroupConcat&); - EXPORT ~GroupConcatAgUM() override; + explicit GroupConcatAg(rowgroup::SP_GroupConcat&, bool isJsonArrayAgg = false); + ~GroupConcatAg(); - using rowgroup::GroupConcatAg::merge; - void initialize() override; - void processRow(const rowgroup::Row&) override; - EXPORT virtual void merge(const rowgroup::Row&, int64_t); + void initialize(); + void processRow(const rowgroup::Row&); + void merge(const rowgroup::Row&, uint64_t); boost::scoped_ptr& concator() { return fConcator; } - EXPORT uint8_t* getResult() override; + uint8_t* getResult(); + + uint32_t getGroupConcatId() const + { + return fGroupConcat->id; + } + + void serialize(messageqcpp::ByteStream& bs) const; + void deserialize(messageqcpp::ByteStream& bs); + + rowgroup::RGDataSizeType getDataSize() const; protected: - virtual void applyMapping(const std::shared_ptr&, const rowgroup::Row&); + void applyMapping(const std::shared_ptr&, const rowgroup::Row&); + rowgroup::SP_GroupConcat fGroupConcat; + bool fIsJsonArrayAgg{false}; boost::scoped_ptr fConcator; boost::scoped_array fData; rowgroup::Row fRow; rowgroup::RGData fRowRGData; rowgroup::RowGroup fRowGroup; bool fNoOrder; + rowgroup::RGDataSizeType fMemSize{0}; }; +using SP_GroupConcatAg = boost::shared_ptr; + // GROUP_CONCAT base class GroupConcator { public: - GroupConcator(); - virtual ~GroupConcator(); + explicit GroupConcator(bool isJsonArrayAgg) : fIsJsonArrayAgg(isJsonArrayAgg) + { + } + virtual ~GroupConcator() = default; virtual void initialize(const rowgroup::SP_GroupConcat&); virtual void processRow(const rowgroup::Row&) = 0; @@ -113,6 +128,10 @@ class GroupConcator virtual const std::string toString() const; + virtual void serialize(messageqcpp::ByteStream&) const; + virtual void deserialize(messageqcpp::ByteStream&); + virtual rowgroup::RGDataSizeType getDataSize() const = 0; + protected: virtual bool concatColIsNull(const rowgroup::Row&); virtual void outputRow(std::ostringstream&, const rowgroup::Row&); @@ -120,18 +139,24 @@ class GroupConcator std::vector fConcatColumns; std::vector > fConstCols; - int64_t fCurrentLength; - int64_t fGroupConcatLen; - int64_t fConstantLen; + int64_t fCurrentLength{0}; + int64_t fGroupConcatLen{0}; + int64_t fConstantLen{0}; std::unique_ptr outputBuf_; - long fTimeZone; + long fTimeZone{0}; + bool fIsJsonArrayAgg{false}; + + joblist::ResourceManager* fRm{nullptr}; + boost::shared_ptr fSessionMemLimit; }; // For GROUP_CONCAT withour distinct or orderby class GroupConcatNoOrder : public GroupConcator { public: - GroupConcatNoOrder(); + explicit GroupConcatNoOrder(bool isJsonArrayAgg) : GroupConcator(isJsonArrayAgg) + { + } ~GroupConcatNoOrder() override; void initialize(const rowgroup::SP_GroupConcat&) override; @@ -142,32 +167,45 @@ class GroupConcatNoOrder : public GroupConcator uint8_t* getResultImpl(const std::string& sep) override; // uint8_t* getResult(const std::string& sep); + void serialize(messageqcpp::ByteStream&) const override; + void deserialize(messageqcpp::ByteStream&) override; + + rowgroup::RGDataSizeType getDataSize() const override + { + return fMemSize; + } + const std::string toString() const override; protected: + std::vector& getRGDatas() { return fDataVec; } + + void createNewRGData(); rowgroup::RowGroup fRowGroup; rowgroup::Row fRow; - rowgroup::RGData fData; - std::queue fDataQueue; - uint64_t fRowsPerRG; - uint64_t fErrorCode; - uint64_t fMemSize; - ResourceManager* fRm; - boost::shared_ptr fSessionMemLimit; + std::vector fDataVec; + uint64_t fRowsPerRG{128}; + rowgroup::RGDataSizeType fMemSize{0}; + rowgroup::RGDataSizeType fCurMemSize{0}; }; // ORDER BY used in GROUP_CONCAT class // This version is for GROUP_CONCAT, the size is limited by the group_concat_max_len. -class GroupConcatOrderBy : public GroupConcator, public ordering::IdbOrderBy +class GroupConcatOrderBy : public GroupConcator, public ordering::IdbCompare { public: - GroupConcatOrderBy(); + explicit GroupConcatOrderBy(bool isJsonArrayAgg); ~GroupConcatOrderBy() override; - using ordering::IdbOrderBy::initialize; + using ordering::IdbCompare::initialize; void initialize(const rowgroup::SP_GroupConcat&) override; void processRow(const rowgroup::Row&) override; - uint64_t getKeyLength() const override; + uint64_t getKeyLength() const; + + void serialize(messageqcpp::ByteStream&) const override; + void deserialize(messageqcpp::ByteStream&) override; + + rowgroup::RGDataSizeType getDataSize() const override; void merge(GroupConcator*) override; using GroupConcator::getResult; @@ -177,8 +215,52 @@ class GroupConcatOrderBy : public GroupConcator, public ordering::IdbOrderBy const std::string toString() const override; protected: + struct Hasher + { + GroupConcatOrderBy* ts; + utils::Hasher_r h; + uint32_t colCount; + + Hasher(GroupConcatOrderBy* t, uint32_t c) : ts(t), colCount(c) + { + } + uint64_t operator()(const rowgroup::Row::Pointer&) const; + }; + + struct Eq + { + GroupConcatOrderBy* ts; + uint32_t colCount; + + Eq(GroupConcatOrderBy* t, uint32_t c) : ts(t), colCount(c) + { + } + + bool operator()(const rowgroup::Row::Pointer&, const rowgroup::Row::Pointer&) const; + }; + + using DistinctMap = std::unordered_map; + + class SortingPQ; + + protected: + void createNewRGData(); + uint64_t getCurrentRowIdx() const; + static uint64_t shiftGroupIdxBy(uint64_t idx, uint32_t shift); + std::vector& getRGDatas() { return fDataVec; } + SortingPQ* getQueue() { return fOrderByQueue.get(); } + + rowgroup::RGDataSizeType fMemSize{0}; + static constexpr uint64_t fRowsPerRG{128}; + + std::vector fOrderByCond; + rowgroup::Row fRow0; + rowgroup::Row row1, row2; + ordering::CompareRule fRule; + std::vector fDataVec; + bool fDistinct; + std::unique_ptr fDistinctMap; + std::unique_ptr fOrderByQueue; }; } // namespace joblist - -#undef EXPORT diff --git a/dbcon/joblist/jlf_common.h b/dbcon/joblist/jlf_common.h index befba9a94..c6a8bb9eb 100644 --- a/dbcon/joblist/jlf_common.h +++ b/dbcon/joblist/jlf_common.h @@ -43,7 +43,6 @@ #include "joblist.h" #include "jobstep.h" #include "groupconcat.h" -#include "jsonarrayagg.h" #include "jl_logger.h" #include "resourcemanager.h" diff --git a/dbcon/joblist/jsonarrayagg.cpp b/dbcon/joblist/jsonarrayagg.cpp deleted file mode 100644 index 16f1be803..000000000 --- a/dbcon/joblist/jsonarrayagg.cpp +++ /dev/null @@ -1,1068 +0,0 @@ -/* Copyright (C) 2022 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -#include -// #define NDEBUG -#include -#include -using namespace std; - - -#include "errorids.h" -#include "exceptclasses.h" -using namespace logging; - -#include "returnedcolumn.h" -#include "aggregatecolumn.h" -#include "arithmeticcolumn.h" -#include "functioncolumn.h" -#include "constantcolumn.h" -#include "rowcolumn.h" -#include "groupconcatcolumn.h" -#include "jsonarrayaggcolumn.h" -#include "calpontsystemcatalog.h" -using namespace execplan; - -#include "rowgroup.h" -#include "rowaggregation.h" -using namespace rowgroup; - -#include "dataconvert.h" -using namespace dataconvert; - -#include "jsonarrayagg.h" - -using namespace ordering; - -#include "jobstep.h" -#include "jlf_common.h" -#include "limitedorderby.h" -#include "mcs_decimal.h" - -#include "utils/json/json.hpp" -using namespace nlohmann; - -namespace joblist -{ - -void JsonArrayInfo::prepJsonArray(JobInfo& jobInfo) -{ - RetColsVector::iterator i = jobInfo.groupConcatCols.begin(); - - while (i != jobInfo.groupConcatCols.end()) - { - JsonArrayAggColumn* gcc = dynamic_cast(i->get()); - const RowColumn* rcp = dynamic_cast(gcc->aggParms()[0].get()); - - SP_GroupConcat groupConcat(new GroupConcat); - groupConcat->fSeparator = gcc->separator(); // or ,? - groupConcat->fDistinct = gcc->distinct(); - groupConcat->fSize = gcc->resultType().colWidth; - groupConcat->fRm = jobInfo.rm; - groupConcat->fSessionMemLimit = jobInfo.umMemLimit; - groupConcat->fTimeZone = jobInfo.timeZone; - - int key = -1; - const vector& cols = rcp->columnVec(); - - for (uint64_t j = 0, k = 0; j < cols.size(); j++) - { - const ConstantColumn* cc = dynamic_cast(cols[j].get()); - - if (cc == NULL) - { - key = getColumnKey(cols[j], jobInfo); - fColumns.insert(key); - groupConcat->fGroupCols.push_back(make_pair(key, k++)); - } - else - { - groupConcat->fConstCols.push_back(make_pair(cc->constval(), j)); - } - } - - vector& orderCols = gcc->orderCols(); - - for (vector::iterator k = orderCols.begin(); k != orderCols.end(); k++) - { - if (dynamic_cast(k->get()) != NULL) - continue; - - key = getColumnKey(*k, jobInfo); - fColumns.insert(key); - groupConcat->fOrderCols.push_back(make_pair(key, k->get()->asc())); - } - - fGroupConcat.push_back(groupConcat); - - i++; - } - - // Rare case: all columns in group_concat are constant columns, use a column in column map. - if (jobInfo.groupConcatCols.size() > 0 && fColumns.size() == 0) - { - int key = -1; - - for (vector::iterator i = jobInfo.tableList.begin(); i != jobInfo.tableList.end() && key == -1; - i++) - { - if (jobInfo.columnMap[*i].size() > 0) - { - key = *(jobInfo.columnMap[*i].begin()); - } - } - - if (key != -1) - { - fColumns.insert(key); - } - else - { - throw runtime_error("Empty column map."); - } - } -} - -uint32_t JsonArrayInfo::getColumnKey(const SRCP& srcp, JobInfo& jobInfo) -{ - int colKey = -1; - const SimpleColumn* sc = dynamic_cast(srcp.get()); - - if (sc != NULL) - { - if (sc->schemaName().empty()) - { - // bug3839, handle columns from subquery. - SimpleColumn tmp(*sc, jobInfo.sessionId); - tmp.oid(tableOid(sc, jobInfo.csc) + 1 + sc->colPosition()); - colKey = getTupleKey(jobInfo, &tmp); - } - else - { - colKey = getTupleKey(jobInfo, sc); - } - - // check if this is a dictionary column - if (jobInfo.keyInfo->dictKeyMap.find(colKey) != jobInfo.keyInfo->dictKeyMap.end()) - colKey = jobInfo.keyInfo->dictKeyMap[colKey]; - } - else - { - const ArithmeticColumn* ac = dynamic_cast(srcp.get()); - const FunctionColumn* fc = dynamic_cast(srcp.get()); - - if (ac != NULL || fc != NULL) - { - colKey = getExpTupleKey(jobInfo, srcp->expressionId()); - } - else - { - cerr << "Unsupported JSON_ARRAYAGG column. " << srcp->toString() << endl; - throw runtime_error("Unsupported JSON_ARRAYAGG column."); - } - } - - return colKey; -} - -void JsonArrayInfo::mapColumns(const RowGroup& projRG) -{ - map projColumnMap; - const vector& keysProj = projRG.getKeys(); - - for (uint64_t i = 0; i < projRG.getColumnCount(); i++) - projColumnMap[keysProj[i]] = i; - - for (vector::iterator k = fGroupConcat.begin(); k != fGroupConcat.end(); k++) - { - vector pos; - vector oids; - vector keys; - vector scale; - vector precision; - vector types; - vector csNums; - pos.push_back(2); - - vector >::iterator i1 = (*k)->fGroupCols.begin(); - - while (i1 != (*k)->fGroupCols.end()) - { - map::iterator j = projColumnMap.find(i1->first); - - if (j == projColumnMap.end()) - { - cerr << "Arrayagg Key:" << i1->first << " is not projected." << endl; - throw runtime_error("Project error."); - } - - pos.push_back(pos.back() + projRG.getColumnWidth(j->second)); - oids.push_back(projRG.getOIDs()[j->second]); - keys.push_back(projRG.getKeys()[j->second]); - types.push_back(projRG.getColTypes()[j->second]); - csNums.push_back(projRG.getCharsetNumber(j->second)); - scale.push_back(projRG.getScale()[j->second]); - precision.push_back(projRG.getPrecision()[j->second]); - - i1++; - } - - vector >::iterator i2 = (*k)->fOrderCols.begin(); - - while (i2 != (*k)->fOrderCols.end()) - { - map::iterator j = projColumnMap.find(i2->first); - - if (j == projColumnMap.end()) - { - cerr << "Order Key:" << i2->first << " is not projected." << endl; - throw runtime_error("Project error."); - } - - vector::iterator i3 = find(keys.begin(), keys.end(), j->first); - int idx = 0; - - if (i3 == keys.end()) - { - idx = keys.size(); - - pos.push_back(pos.back() + projRG.getColumnWidth(j->second)); - oids.push_back(projRG.getOIDs()[j->second]); - keys.push_back(projRG.getKeys()[j->second]); - types.push_back(projRG.getColTypes()[j->second]); - csNums.push_back(projRG.getCharsetNumber(j->second)); - scale.push_back(projRG.getScale()[j->second]); - precision.push_back(projRG.getPrecision()[j->second]); - } - else - { - idx = std::distance(keys.begin(), i3); - } - - (*k)->fOrderCond.push_back(make_pair(idx, i2->second)); - - i2++; - } - - (*k)->fRowGroup = RowGroup(oids.size(), pos, oids, keys, types, csNums, scale, precision, - projRG.getStringTableThreshold(), false); - - // MCOL-5491/MCOL-5429 Use stringstore if the datatype of the - // json_arrayagg/group_concat field is a long string. - if ((*k)->fRowGroup.hasLongString()) - { - (*k)->fRowGroup.setUseStringTable(true); - } - - (*k)->fMapping = makeMapping(projRG, (*k)->fRowGroup); - } -} - -std::shared_ptr JsonArrayInfo::makeMapping(const RowGroup& in, const RowGroup& out) -{ - // For some reason using the rowgroup mapping fcns don't work completely right in this class - std::shared_ptr mapping(new int[out.getColumnCount()]); - - for (uint64_t i = 0; i < out.getColumnCount(); i++) - { - for (uint64_t j = 0; j < in.getColumnCount(); j++) - { - if ((out.getKeys()[i] == in.getKeys()[j])) - { - mapping[i] = j; - break; - } - } - } - - return mapping; -} - -const string JsonArrayInfo::toString() const -{ - ostringstream oss; - oss << "JsonArrayInfo: toString() to be implemented."; - oss << endl; - - return oss.str(); -} - -JsonArrayAggregatAgUM::JsonArrayAggregatAgUM(rowgroup::SP_GroupConcat& gcc) : GroupConcatAgUM(gcc) -{ - initialize(); -} - -JsonArrayAggregatAgUM::~JsonArrayAggregatAgUM() -{ -} - -void JsonArrayAggregatAgUM::initialize() -{ - if (fGroupConcat->fDistinct || fGroupConcat->fOrderCols.size() > 0) - fConcator.reset(new JsonArrayAggOrderBy()); - else - fConcator.reset(new JsonArrayAggNoOrder()); - - fConcator->initialize(fGroupConcat); - - // MCOL-5491/MCOL-5429 Use stringstore if the datatype of the - // json_arrayagg/group_concat field is a long string. - if (fGroupConcat->fRowGroup.hasLongString()) - { - fRowGroup = fGroupConcat->fRowGroup; - fRowGroup.setUseStringTable(true); - fRowRGData.reinit(fRowGroup, 1); - fRowGroup.setData(&fRowRGData); - fRowGroup.resetRowGroup(0); - fRowGroup.initRow(&fRow); - fRowGroup.getRow(0, &fRow); - } - else - { - fGroupConcat->fRowGroup.initRow(&fRow, true); - fData.reset(new uint8_t[fRow.getSize()]); - fRow.setData(rowgroup::Row::Pointer(fData.get())); - } -} - -void JsonArrayAggregatAgUM::processRow(const rowgroup::Row& inRow) -{ - applyMapping(fGroupConcat->fMapping, inRow); - fConcator->processRow(fRow); -} - -void JsonArrayAggregatAgUM::merge(const rowgroup::Row& inRow, int64_t i) -{ - uint8_t* data = inRow.getData(); - joblist::JsonArrayAggregatAgUM* gccAg = *((joblist::JsonArrayAggregatAgUM**)(data + inRow.getOffset(i))); - - fConcator->merge(gccAg->concator().get()); -} - -uint8_t* JsonArrayAggregatAgUM::getResult() -{ - return fConcator->getResult(fGroupConcat->fSeparator); -} - -void JsonArrayAggregatAgUM::applyMapping(const std::shared_ptr& mapping, const Row& row) -{ - // For some reason the rowgroup mapping fcns don't work right in this class. - for (uint64_t i = 0; i < fRow.getColumnCount(); i++) - { - if (fRow.getColumnWidth(i) > datatypes::MAXLEGACYWIDTH) - { - if (fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::CHAR || - fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::VARCHAR || - fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::TEXT) - { - fRow.setStringField(row.getConstString(mapping[i]), i); - } - else if (fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::LONGDOUBLE) - { - fRow.setLongDoubleField(row.getLongDoubleField(mapping[i]), i); - } - else if (datatypes::isWideDecimalType(fRow.getColType(i), fRow.getColumnWidth(i))) - { - row.copyBinaryField(fRow, i, mapping[i]); - } - } - else - { - if (fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::CHAR || - fRow.getColTypes()[i] == execplan::CalpontSystemCatalog::VARCHAR) - { - fRow.setIntField(row.getUintField(mapping[i]), i); - } - else - { - fRow.setIntField(row.getIntField(mapping[i]), i); - } - } - } -} - -JsonArrayAggregator::JsonArrayAggregator() : GroupConcator() -{ -} - -JsonArrayAggregator::~JsonArrayAggregator() -{ -} - -void JsonArrayAggregator::initialize(const rowgroup::SP_GroupConcat& gcc) -{ - fGroupConcatLen = gcc->fSize; - fCurrentLength -= strlen(gcc->fSeparator.c_str()); - fTimeZone = gcc->fTimeZone; - - fConstCols = gcc->fConstCols; - fConstantLen = strlen(gcc->fSeparator.c_str()); - - for (uint64_t i = 0; i < fConstCols.size(); i++) - fConstantLen += fConstCols[i].first.length(); -} - -void JsonArrayAggregator::outputRow(std::ostringstream& oss, const rowgroup::Row& row) -{ - const CalpontSystemCatalog::ColDataType* types = row.getColTypes(); - vector::iterator i = fConcatColumns.begin(); - vector >::iterator j = fConstCols.begin(); - - uint64_t groupColCount = fConcatColumns.size() + fConstCols.size(); - - for (uint64_t k = 0; k < groupColCount; k++) - { - if (j != fConstCols.end() && k == j->second) - { - oss << j->first.safeString(""); // XXX: NULLs??? - j++; - continue; - } - - switch (types[*i]) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - { - int64_t intVal = row.getIntField(*i); - - oss << intVal; - - break; - } - - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - { - oss << fixed << row.getDecimalField(*i); - break; - } - - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - { - uint64_t uintVal = row.getUintField(*i); - int scale = (int)row.getScale(*i); - - if (scale == 0) - { - oss << uintVal; - } - else - { - oss << fixed - << datatypes::Decimal(datatypes::TSInt128((int128_t)uintVal), scale, - datatypes::INT128MAXPRECISION); - } - - break; - } - - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::TEXT: - { - std::string maybeJson = row.getStringField(*i).safeString(""); // XXX: NULL??? it is not checked anywhere. - [[maybe_unused]] const auto j = json::parse(maybeJson, nullptr, false); - if (j.is_discarded()) - { - oss << std::quoted(maybeJson.c_str()); - } - else - { - oss << maybeJson.c_str(); - } - break; - } - - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - { - oss << setprecision(15) << row.getDoubleField(*i); - break; - } - - case CalpontSystemCatalog::LONGDOUBLE: - { - oss << setprecision(15) << row.getLongDoubleField(*i); - break; - } - - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - { - oss << row.getFloatField(*i); - break; - } - - case CalpontSystemCatalog::DATE: - { - oss << std::quoted(DataConvert::dateToString(row.getUintField(*i))); - break; - } - - case CalpontSystemCatalog::DATETIME: - { - oss << std::quoted(DataConvert::datetimeToString(row.getUintField(*i))); - break; - } - - case CalpontSystemCatalog::TIMESTAMP: - { - oss << std::quoted(DataConvert::timestampToString(row.getUintField(*i), fTimeZone)); - break; - } - - case CalpontSystemCatalog::TIME: - { - oss << std::quoted(DataConvert::timeToString(row.getUintField(*i))); - break; - } - - default: - { - break; - } - } - - i++; - } -} - -bool JsonArrayAggregator::concatColIsNull(const rowgroup::Row& row) -{ - bool ret = false; - - for (vector::iterator i = fConcatColumns.begin(); i != fConcatColumns.end(); i++) - { - if (row.isNullValue(*i)) - { - ret = true; - break; - } - } - - return ret; -} - -int64_t JsonArrayAggregator::lengthEstimate(const rowgroup::Row& row) -{ - int64_t rowLen = fConstantLen; // fixed constant and separator length - const CalpontSystemCatalog::ColDataType* types = row.getColTypes(); - - // null values are already skipped. - for (vector::iterator i = fConcatColumns.begin(); i != fConcatColumns.end(); i++) - { - if (row.isNullValue(*i)) - continue; - - int64_t fieldLen = 0; - - switch (types[*i]) - { - case CalpontSystemCatalog::TINYINT: - case CalpontSystemCatalog::SMALLINT: - case CalpontSystemCatalog::MEDINT: - case CalpontSystemCatalog::INT: - case CalpontSystemCatalog::BIGINT: - { - int64_t v = row.getIntField(*i); - - if (v < 0) - fieldLen++; - - while ((v /= 10) != 0) - fieldLen++; - - fieldLen += 1; - break; - } - - case CalpontSystemCatalog::UTINYINT: - case CalpontSystemCatalog::USMALLINT: - case CalpontSystemCatalog::UMEDINT: - case CalpontSystemCatalog::UINT: - case CalpontSystemCatalog::UBIGINT: - { - uint64_t v = row.getUintField(*i); - - while ((v /= 10) != 0) - fieldLen++; - - fieldLen += 1; - break; - } - - case CalpontSystemCatalog::DECIMAL: - case CalpontSystemCatalog::UDECIMAL: - { - fieldLen += 1; - - break; - } - - case CalpontSystemCatalog::CHAR: - case CalpontSystemCatalog::VARCHAR: - case CalpontSystemCatalog::TEXT: - { - fieldLen += row.getConstString(*i).length(); - break; - } - - case CalpontSystemCatalog::DOUBLE: - case CalpontSystemCatalog::UDOUBLE: - case CalpontSystemCatalog::FLOAT: - case CalpontSystemCatalog::UFLOAT: - case CalpontSystemCatalog::LONGDOUBLE: - { - fieldLen = 1; // minimum length - break; - } - - case CalpontSystemCatalog::DATE: - { - fieldLen = 10; // YYYY-MM-DD - break; - } - - case CalpontSystemCatalog::DATETIME: - case CalpontSystemCatalog::TIMESTAMP: - { - fieldLen = 19; // YYYY-MM-DD HH24:MI:SS - // Decimal point and milliseconds - uint64_t colPrecision = row.getPrecision(*i); - - if (colPrecision > 0 && colPrecision < 7) - { - fieldLen += colPrecision + 1; - } - - break; - } - - case CalpontSystemCatalog::TIME: - { - fieldLen = 10; // -HHH:MI:SS - // Decimal point and milliseconds - uint64_t colPrecision = row.getPrecision(*i); - - if (colPrecision > 0 && colPrecision < 7) - { - fieldLen += colPrecision + 1; - } - - break; - } - - default: - { - break; - } - } - - rowLen += fieldLen; - } - - return rowLen; -} - -const string JsonArrayAggregator::toString() const -{ - ostringstream oss; - oss << "JsonArray size-" << fGroupConcatLen; - oss << "Concat cols: "; - vector::const_iterator i = fConcatColumns.begin(); - auto j = fConstCols.begin(); - uint64_t groupColCount = fConcatColumns.size() + fConstCols.size(); - - for (uint64_t k = 0; k < groupColCount; k++) - { - if (j != fConstCols.end() && k == j->second) - { - oss << 'c' << " "; - j++; - } - else - { - oss << (*i) << " "; - i++; - } - } - - oss << endl; - - return oss.str(); -} - -JsonArrayAggOrderBy::JsonArrayAggOrderBy() -{ - fRule.fIdbCompare = this; -} - -JsonArrayAggOrderBy::~JsonArrayAggOrderBy() -{ -} - -void JsonArrayAggOrderBy::initialize(const rowgroup::SP_GroupConcat& gcc) -{ - JsonArrayAggregator::initialize(gcc); - - fOrderByCond.resize(0); - - for (uint64_t i = 0; i < gcc->fOrderCond.size(); i++) - fOrderByCond.push_back(IdbSortSpec(gcc->fOrderCond[i].first, gcc->fOrderCond[i].second)); - - fDistinct = gcc->fDistinct; - fRowsPerRG = 128; - fErrorCode = ERR_AGGREGATION_TOO_BIG; - fRm = gcc->fRm; - fSessionMemLimit = gcc->fSessionMemLimit; - - vector >::iterator i = gcc->fGroupCols.begin(); - - while (i != gcc->fGroupCols.end()) - fConcatColumns.push_back((*(i++)).second); - - IdbOrderBy::initialize(gcc->fRowGroup); -} - -uint64_t JsonArrayAggOrderBy::getKeyLength() const -{ - // only distinct the concatenated columns - return fConcatColumns.size(); // cols 0 to fConcatColumns.size() - 1 will be compared -} - -void JsonArrayAggOrderBy::processRow(const rowgroup::Row& row) -{ - // check if this is a distinct row - if (fDistinct && fDistinctMap->find(row.getPointer()) != fDistinctMap->end()) - return; - - // this row is skipped if any concatenated column is null. - if (concatColIsNull(row)) - return; - - auto& orderByQueue = getQueue(); - - // if the row count is less than the limit - if (fCurrentLength < fGroupConcatLen) - { - copyRow(row, &fRow0); - // the RID is no meaning here, use it to store the estimated length. - int16_t estLen = lengthEstimate(fRow0); - fRow0.setRid(estLen); - OrderByRow newRow(fRow0, fRule); - orderByQueue.push(newRow); - fCurrentLength += estLen; - - // add to the distinct map - if (fDistinct) - fDistinctMap->insert(fRow0.getPointer()); - - fRowGroup.incRowCount(); - fRow0.nextRow(); - - if (fRowGroup.getRowCount() >= fRowsPerRG) - { - fDataQueue.push(fData); - - uint64_t newSize = fRowsPerRG * fRowGroup.getRowSize(); - - if (!fRm->getMemory(newSize, fSessionMemLimit)) - { - cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode) << " @" << __FILE__ << ":" << __LINE__; - throw IDBExcept(fErrorCode); - } - fMemSize += newSize; - - fData.reinit(fRowGroup, fRowsPerRG); - fRowGroup.setData(&fData); - fRowGroup.resetRowGroup(0); - fRowGroup.getRow(0, &fRow0); - } - } - - else if (fOrderByCond.size() > 0 && fRule.less(row.getPointer(), orderByQueue.top().fData)) - { - OrderByRow swapRow = orderByQueue.top(); - fRow1.setData(swapRow.fData); - orderByQueue.pop(); - fCurrentLength -= fRow1.getRelRid(); - fRow2.setData(swapRow.fData); - - if (!fDistinct) - { - copyRow(row, &fRow1); - } - else - { - // only the copyRow does useful work here - fDistinctMap->erase(swapRow.fData); - copyRow(row, &fRow2); - fDistinctMap->insert(swapRow.fData); - } - - int16_t estLen = lengthEstimate(fRow2); - fRow2.setRid(estLen); - fCurrentLength += estLen; - - orderByQueue.push(swapRow); - } -} - -void JsonArrayAggOrderBy::merge(GroupConcator* gc) -{ - JsonArrayAggOrderBy* go = dynamic_cast(gc); - - auto& orderByQueue = getQueue(); - auto& mergeQueue = go->getQueue(); - - while (mergeQueue.empty() == false) - { - const OrderByRow& row = mergeQueue.top(); - - // check if the distinct row already exists - if (fDistinct && fDistinctMap->find(row.fData) != fDistinctMap->end()) - { - ; // no op; - } - - // if the row count is less than the limit - else if (fCurrentLength < fGroupConcatLen) - { - orderByQueue.push(row); - row1.setData(row.fData); - fCurrentLength += row1.getRelRid(); - - // add to the distinct map - if (fDistinct) - fDistinctMap->insert(row.fData); - } - - else if (fOrderByCond.size() > 0 && fRule.less(row.fData, orderByQueue.top().fData)) - { - OrderByRow swapRow = orderByQueue.top(); - row1.setData(swapRow.fData); - orderByQueue.pop(); - fCurrentLength -= row1.getRelRid(); - - if (fDistinct) - { - fDistinctMap->erase(swapRow.fData); - fDistinctMap->insert(row.fData); - } - - row1.setData(row.fData); - fCurrentLength += row1.getRelRid(); - - orderByQueue.push(row); - } - - mergeQueue.pop(); - } -} - -uint8_t* JsonArrayAggOrderBy::getResultImpl(const string&) -{ - ostringstream oss; - bool addSep = false; - - // need to reverse the order - stack rowStack; - auto& orderByQueue = getQueue(); - - while (orderByQueue.size() > 0) - { - rowStack.push(orderByQueue.top()); - orderByQueue.pop(); - } - if (rowStack.size() > 0) - { - oss << '['; - while (rowStack.size() > 0) - { - if (addSep) - oss << ','; - else - addSep = true; - - const OrderByRow& topRow = rowStack.top(); - fRow0.setData(topRow.fData); - outputRow(oss, fRow0); - rowStack.pop(); - } - oss << ']'; - } - - return swapStreamWithStringAndReturnBuf(oss, false); -} - -const string JsonArrayAggOrderBy::toString() const -{ - string baseStr = JsonArrayAggregator::toString(); - - ostringstream oss; - oss << "OrderBy cols: "; - vector::const_iterator i = fOrderByCond.begin(); - - for (; i != fOrderByCond.end(); i++) - oss << "(" << i->fIndex << "," << ((i->fAsc) ? "Asc" : "Desc") << "," - << ((i->fNf) ? "null first" : "null last") << ") "; - - if (fDistinct) - oss << endl << " distinct"; - - oss << endl; - - return (baseStr + oss.str()); -} - -JsonArrayAggNoOrder::JsonArrayAggNoOrder() - : fRowsPerRG(128), fErrorCode(ERR_AGGREGATION_TOO_BIG), fMemSize(0), fRm(NULL) -{ -} - -JsonArrayAggNoOrder::~JsonArrayAggNoOrder() -{ - if (fRm) - fRm->returnMemory(fMemSize, fSessionMemLimit); -} - -void JsonArrayAggNoOrder::initialize(const rowgroup::SP_GroupConcat& gcc) -{ - JsonArrayAggregator::initialize(gcc); - - fRowGroup = gcc->fRowGroup; - fRowsPerRG = 128; - fErrorCode = ERR_AGGREGATION_TOO_BIG; - fRm = gcc->fRm; - fSessionMemLimit = gcc->fSessionMemLimit; - - vector >::iterator i = gcc->fGroupCols.begin(); - - while (i != gcc->fGroupCols.end()) - fConcatColumns.push_back((*(i++)).second); - - uint64_t newSize = fRowsPerRG * fRowGroup.getRowSize(); - - if (!fRm->getMemory(newSize, fSessionMemLimit)) - { - cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode) << " @" << __FILE__ << ":" << __LINE__; - throw IDBExcept(fErrorCode); - } - fMemSize += newSize; - - fData.reinit(fRowGroup, fRowsPerRG); - fRowGroup.setData(&fData); - fRowGroup.resetRowGroup(0); - fRowGroup.initRow(&fRow); - fRowGroup.getRow(0, &fRow); -} - -void JsonArrayAggNoOrder::processRow(const rowgroup::Row& row) -{ - // if the row count is less than the limit - if (fCurrentLength < fGroupConcatLen && concatColIsNull(row) == false) - { - copyRow(row, &fRow); - - // the RID is no meaning here, use it to store the estimated length. - int16_t estLen = lengthEstimate(fRow); - fRow.setRid(estLen); - fCurrentLength += estLen; - fRowGroup.incRowCount(); - fRow.nextRow(); - - if (fRowGroup.getRowCount() >= fRowsPerRG) - { - uint64_t newSize = fRowsPerRG * fRowGroup.getRowSize(); - - if (!fRm->getMemory(newSize, fSessionMemLimit)) - { - cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode) << " @" << __FILE__ << ":" << __LINE__; - throw IDBExcept(fErrorCode); - } - fMemSize += newSize; - - fDataQueue.push(fData); - fData.reinit(fRowGroup, fRowsPerRG); - fRowGroup.setData(&fData); - fRowGroup.resetRowGroup(0); - fRowGroup.getRow(0, &fRow); - } - } -} - -void JsonArrayAggNoOrder::merge(GroupConcator* gc) -{ - JsonArrayAggNoOrder* in = dynamic_cast(gc); - - while (in->fDataQueue.size() > 0) - { - fDataQueue.push(in->fDataQueue.front()); - in->fDataQueue.pop(); - } - - fDataQueue.push(in->fData); - fMemSize += in->fMemSize; - in->fMemSize = 0; -} - -uint8_t* JsonArrayAggNoOrder::getResultImpl(const string&) -{ - ostringstream oss; - bool addSep = false; - if (fRowGroup.getRowCount() > 0) - { - oss << '['; - fDataQueue.push(fData); - - while (fDataQueue.size() > 0) - { - fRowGroup.setData(&fDataQueue.front()); - fRowGroup.getRow(0, &fRow); - - for (uint64_t i = 0; i < fRowGroup.getRowCount(); i++) - { - if (addSep) - oss << ','; - else - addSep = true; - - outputRow(oss, fRow); - fRow.nextRow(); - } - - fDataQueue.pop(); - } - oss << ']'; - } - return swapStreamWithStringAndReturnBuf(oss, false); -} - -const string JsonArrayAggNoOrder::toString() const -{ - return JsonArrayAggregator::toString(); -} - -} // namespace joblist diff --git a/dbcon/joblist/jsonarrayagg.h b/dbcon/joblist/jsonarrayagg.h deleted file mode 100644 index 72ca5361d..000000000 --- a/dbcon/joblist/jsonarrayagg.h +++ /dev/null @@ -1,139 +0,0 @@ -/* Copyright (C) 2022 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/** @file */ - -#pragma once - -#include -#include -#include -#include - -#include "groupconcat.h" - -#define EXPORT - -namespace joblist -{ -// forward reference -class JsonArrayAggregator; -class ResourceManager; - -class JsonArrayInfo : public GroupConcatInfo -{ - public: - void prepJsonArray(JobInfo&); - void mapColumns(const rowgroup::RowGroup&) override; - - const std::string toString() const override; - - protected: - uint32_t getColumnKey(const execplan::SRCP& srcp, JobInfo& jobInfo) override; - std::shared_ptr makeMapping(const rowgroup::RowGroup&, const rowgroup::RowGroup&) override; -}; - -class JsonArrayAggregatAgUM : public GroupConcatAgUM -{ - public: - EXPORT explicit JsonArrayAggregatAgUM(rowgroup::SP_GroupConcat&); - EXPORT ~JsonArrayAggregatAgUM() override; - - using rowgroup::GroupConcatAg::merge; - void initialize() override; - void processRow(const rowgroup::Row&) override; - EXPORT void merge(const rowgroup::Row&, int64_t) override; - - EXPORT void getResult(uint8_t*); - EXPORT uint8_t* getResult() override; - - protected: - void applyMapping(const std::shared_ptr&, const rowgroup::Row&) override; -}; - -// JSON_ARRAYAGG base -class JsonArrayAggregator : public GroupConcator -{ - public: - JsonArrayAggregator(); - ~JsonArrayAggregator() override; - - void initialize(const rowgroup::SP_GroupConcat&) override; - void processRow(const rowgroup::Row&) override = 0; - - const std::string toString() const override; - - protected: - bool concatColIsNull(const rowgroup::Row&) override; - void outputRow(std::ostringstream&, const rowgroup::Row&) override; - int64_t lengthEstimate(const rowgroup::Row&) override; -}; - -// For JSON_ARRAYAGG withour distinct or orderby -class JsonArrayAggNoOrder : public JsonArrayAggregator -{ - public: - JsonArrayAggNoOrder(); - ~JsonArrayAggNoOrder() override; - - void initialize(const rowgroup::SP_GroupConcat&) override; - void processRow(const rowgroup::Row&) override; - - using GroupConcator::merge; - void merge(GroupConcator*) override; - using GroupConcator::getResult; - uint8_t* getResultImpl(const std::string& sep) override; - - const std::string toString() const override; - - protected: - rowgroup::RowGroup fRowGroup; - rowgroup::Row fRow; - rowgroup::RGData fData; - std::queue fDataQueue; - uint64_t fRowsPerRG; - uint64_t fErrorCode; - uint64_t fMemSize; - ResourceManager* fRm; - boost::shared_ptr fSessionMemLimit; -}; - -// ORDER BY used in JSON_ARRAYAGG class -class JsonArrayAggOrderBy : public JsonArrayAggregator, public ordering::IdbOrderBy -{ - public: - JsonArrayAggOrderBy(); - ~JsonArrayAggOrderBy() override; - - using ordering::IdbOrderBy::initialize; - void initialize(const rowgroup::SP_GroupConcat&) override; - void processRow(const rowgroup::Row&) override; - uint64_t getKeyLength() const override; - - using GroupConcator::merge; - void merge(GroupConcator*) override; - using GroupConcator::getResult; - uint8_t* getResultImpl(const std::string& sep) override; - - const std::string toString() const override; - - protected: -}; - -} // namespace joblist - -#undef EXPORT diff --git a/dbcon/joblist/resourcemanager.cpp b/dbcon/joblist/resourcemanager.cpp index 91831f366..3c684697d 100644 --- a/dbcon/joblist/resourcemanager.cpp +++ b/dbcon/joblist/resourcemanager.cpp @@ -237,9 +237,6 @@ ResourceManager::ResourceManager(bool runningInExeMgr, config::Config* aConfig) else fUseHdfs = false; - fAllowedDiskAggregation = - getBoolVal(fRowAggregationStr, "AllowDiskBasedAggregation", defaultAllowDiskAggregation); - if (!load_encryption_keys()) { Logger log; @@ -390,4 +387,9 @@ bool ResourceManager::getMemory(int64_t amount, bool patience) return ret1; } +bool ResourceManager::getAllowDiskAggregation() const +{ + return getBoolVal(fRowAggregationStr, "AllowDiskBasedAggregation", defaultAllowDiskAggregation); +} + } // namespace joblist diff --git a/dbcon/joblist/resourcemanager.h b/dbcon/joblist/resourcemanager.h index eacbceb3c..396cbe2c5 100644 --- a/dbcon/joblist/resourcemanager.h +++ b/dbcon/joblist/resourcemanager.h @@ -156,10 +156,7 @@ class ResourceManager return getIntVal(fExeMgrStr, "ExecQueueSize", defaultEMExecQueueSize); } - bool getAllowDiskAggregation() const - { - return fAllowedDiskAggregation; - } + bool getAllowDiskAggregation() const; uint64_t getDECConnectionsPerQuery() const { @@ -528,7 +525,6 @@ class ResourceManager bool isExeMgr; bool fUseHdfs; - bool fAllowedDiskAggregation{false}; uint64_t fDECConnectionsPerQuery; }; diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 595ce634b..b837e837e 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -5632,7 +5632,7 @@ void TupleAggregateStep::threadedAggregateRowGroups(uint32_t threadID) { handleException(std::current_exception(), logging::tupleAggregateStepErr, logging::ERR_AGGREGATION_TOO_BIG, - "TupleAggregateStep::threadedAggregateRowGroups()[" + std::to_string(threadID) + "]"); + "TupleAggregateStep::threadedAggregateRowGroups()"); fEndOfResult = true; fDoneAggregate = true; } diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index bc8596d1b..a7667ba66 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -71,7 +71,6 @@ using namespace cal_impl_if; #include "functioncolumn.h" #include "groupconcatcolumn.h" #include "intervalcolumn.h" -#include "jsonarrayaggcolumn.h" #include "logicoperator.h" #include "outerjoinonfilter.h" #include "predicateoperator.h" @@ -96,7 +95,6 @@ const uint64_t SUB_BIT = 0x02; const uint64_t AF_BIT = 0x04; const uint64_t CORRELATED = 0x08; - // In certain cases, gp_walk is called recursively. When done so, // we need to bookmark the rcWorkStack for those cases where a constant // expression such as 1=1 is used in an if statement or function call. @@ -167,7 +165,7 @@ bool itemDisablesWrapping(Item* item, gp_walk_info& gwi); void pushReturnedCol(gp_walk_info& gwi, Item* from, SRCP rc) { uint32_t i; - for ( i = 0; i < gwi.processed.size(); i++) + for (i = 0; i < gwi.processed.size(); i++) { Item* ith = gwi.processed[i].first; @@ -352,7 +350,8 @@ cal_impl_if::gp_walk_info::~gp_walk_info() delete ptWorkStack.top(); ptWorkStack.pop(); } - for (uint32_t i=0;i(item); item = (Item*)*ref_item->ref; } - if (item->type() == Item::FIELD_ITEM || item->type() == Item::CONST_ITEM || item->type() == Item::NULL_ITEM) + if (item->type() == Item::FIELD_ITEM || item->type() == Item::CONST_ITEM || + item->type() == Item::NULL_ITEM) { return true; } @@ -1610,7 +1611,6 @@ uint32_t buildJoin(gp_walk_info& gwi, List& join_list, ParseTree* pt = new ParseTree(onFilter); outerJoinStack.push(pt); } - } else // inner join { @@ -1706,7 +1706,7 @@ bool buildRowColumnFilter(gp_walk_info* gwip, RowColumn* rhs, RowColumn* lhs, It // two entries have been popped from the stack already: lhs and rhs stack tmpStack; vector valVec; - vector heldOutVals; // these vals are not rhs/lhs and need to be freed + vector heldOutVals; // these vals are not rhs/lhs and need to be freed tmpStack.push(rhs); tmpStack.push(lhs); assert(gwip->rcWorkStack.size() >= ifp->argument_count() - 2); @@ -2116,7 +2116,7 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip) sop.reset(new PredicateOperator(eqop)); SRCP scsp = gwip->scsp; idbassert(scsp.get() != nullptr); - //sop->setOpType(gwip->scsp->resultType(), rhs->resultType()); + // sop->setOpType(gwip->scsp->resultType(), rhs->resultType()); sop->setOpType(scsp->resultType(), rhs->resultType()); ConstantFilter* cf = 0; @@ -3425,8 +3425,9 @@ ReturnedColumn* wrapIntoAggregate(ReturnedColumn* rc, gp_walk_info& gwi, Item* b ac->charsetNumber(rc->charsetNumber()); ac->orderPos(rc->orderPos()); uint32_t i; - for(i=0; i < gwi.processed.size() && !gwi.processed[i].first->eq(baseItem, false);i++) - { } + for (i = 0; i < gwi.processed.size() && !gwi.processed[i].first->eq(baseItem, false); i++) + { + } if (i < gwi.processed.size()) { ac->expressionId(gwi.processed[i].second); @@ -3441,7 +3442,6 @@ ReturnedColumn* wrapIntoAggregate(ReturnedColumn* rc, gp_walk_info& gwi, Item* b return ac; } - ReturnedColumn* buildReturnedColumnNull(gp_walk_info& gwi) { if (gwi.condPush) @@ -3875,7 +3875,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo // Could have it set if there are aggregation funcs as this function arguments. gwi.fatalParseError = false; - //ReturnedColumn* rc = buildAggFrmTempField(sfitempp[0], gwi); + // ReturnedColumn* rc = buildAggFrmTempField(sfitempp[0], gwi); ReturnedColumn* rc = buildReturnedColumn(sfitempp[0], gwi, nonSupport); if (rc) lhs = new ParseTree(rc); @@ -3895,7 +3895,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo // Could have it set if there are aggregation funcs as this function arguments. gwi.fatalParseError = false; - //ReturnedColumn* rc = buildAggFrmTempField(sfitempp[1], gwi); + // ReturnedColumn* rc = buildAggFrmTempField(sfitempp[1], gwi); ReturnedColumn* rc = buildReturnedColumn(sfitempp[1], gwi, nonSupport); if (rc) rhs = new ParseTree(rc); @@ -4019,8 +4019,8 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo int32_t leftColWidth = leftColType.colWidth; int32_t rightColWidth = rightColType.colWidth; - if ((leftColWidth == datatypes::MAXDECIMALWIDTH || rightColWidth == datatypes::MAXDECIMALWIDTH) - && datatypes::isDecimal(mysqlType.colDataType)) + if ((leftColWidth == datatypes::MAXDECIMALWIDTH || rightColWidth == datatypes::MAXDECIMALWIDTH) && + datatypes::isDecimal(mysqlType.colDataType)) { mysqlType.colWidth = datatypes::MAXDECIMALWIDTH; @@ -4078,7 +4078,7 @@ ReturnedColumn* buildArithmeticColumnBody(Item_func* item, gp_walk_info& gwi, bo strcasecmp(ac->alias().c_str(), gwi.returnedCols[i]->alias().c_str()) == 0) { ac->expressionId(gwi.returnedCols[i]->expressionId()); - isOnSelectList = true; + isOnSelectList = true; break; } } @@ -4114,7 +4114,8 @@ ReturnedColumn* buildArithmeticColumn(Item_func* item, gp_walk_info& gwi, bool& return rc; } -ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool& nonSupport, bool selectBetweenIn) +ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool& nonSupport, + bool selectBetweenIn) { if (get_fe_conn_info_ptr() == NULL) { @@ -4125,7 +4126,7 @@ ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool& cal_connection_info* ci = static_cast(get_fe_conn_info_ptr()); string funcName = ifp->func_name(); - if ( nullptr != dynamic_cast(ifp)) + if (nullptr != dynamic_cast(ifp)) { // the condition above is the only way to recognize this particular case. funcName = "concat_operator_oracle"; @@ -4356,9 +4357,9 @@ ReturnedColumn* buildFunctionColumnBody(Item_func* ifp, gp_walk_info& gwi, bool& if (mayHasBoolArg && isBoolType) rc = buildBooleanConstantColumn(ifp->arguments()[i], gwi, nonSupport); else - { + { rc = buildReturnedColumn(ifp->arguments()[i], gwi, nonSupport); - } + } // MCOL-1510 It must be a temp table field, so find the corresponding column. if (!rc && ifp->arguments()[i]->type() == Item::REF_ITEM) @@ -5175,7 +5176,7 @@ void analyzeForImplicitGroupBy(Item* item, gp_walk_info& gwi) if (item->type() == Item::FUNC_ITEM) { Item_func* ifp = static_cast(item); - for(uint32_t i = 0;iargument_count() && !gwi.implicitExplicitGroupBy;i++) + for (uint32_t i = 0; i < ifp->argument_count() && !gwi.implicitExplicitGroupBy; i++) { analyzeForImplicitGroupBy(ifp->arguments()[i], gwi); } @@ -5224,7 +5225,7 @@ ReturnedColumn* buildAggregateColumnBody(Item* item, gp_walk_info& gwi) } else if (isp->sum_func() == Item_sum::JSON_ARRAYAGG_FUNC) { - ac = new JsonArrayAggColumn(gwi.sessionid); + ac = new GroupConcatColumn(gwi.sessionid, true); } else if (isp->sum_func() == Item_sum::UDF_SUM_FUNC) { @@ -5402,7 +5403,7 @@ ReturnedColumn* buildAggregateColumnBody(Item* item, gp_walk_info& gwi) } rowCol->columnVec(selCols); - (dynamic_cast(ac))->orderCols(orderCols); + (dynamic_cast(ac))->orderCols(orderCols); parm.reset(rowCol); ac->aggParms().push_back(parm); @@ -5410,7 +5411,7 @@ ReturnedColumn* buildAggregateColumnBody(Item* item, gp_walk_info& gwi) { string separator; separator.assign(gc->get_separator()->ptr(), gc->get_separator()->length()); - (dynamic_cast(ac))->separator(separator); + (dynamic_cast(ac))->separator(separator); } } else if (isSupportedAggregateWithOneConstArg(isp, sfitempp)) @@ -5482,21 +5483,23 @@ ReturnedColumn* buildAggregateColumnBody(Item* item, gp_walk_info& gwi) { //@bug5229. handle constant function on aggregate argument ac->constCol(SRCP(rc)); - // XXX: this skips restoration of clauseType. + // XXX: this skips restoration of clauseType. break; } - // the "rc" can be in gwi.no_parm_func_list. erase it from that list and - // then delete it. - // kludge, I know. - uint32_t i; + // the "rc" can be in gwi.no_parm_func_list. erase it from that list and + // then delete it. + // kludge, I know. + uint32_t i; - for (i = 0; gwi.no_parm_func_list[i] != rc && i < gwi.no_parm_func_list.size(); i++) { } + for (i = 0; gwi.no_parm_func_list[i] != rc && i < gwi.no_parm_func_list.size(); i++) + { + } - if (i < gwi.no_parm_func_list.size()) - { + if (i < gwi.no_parm_func_list.size()) + { gwi.no_parm_func_list.erase(gwi.no_parm_func_list.begin() + i); delete rc; - } + } } } @@ -6003,7 +6006,7 @@ void gp_walk(const Item* item, void* arg) if (ifp) { - // XXX: this looks awfuly wrong. + // XXX: this looks awfuly wrong. SimpleColumn* scp = buildSimpleColumn(ifp, *gwip); if (!scp) @@ -6012,7 +6015,7 @@ void gp_walk(const Item* item, void* arg) string aliasTableName(scp->tableAlias()); scp->tableAlias(aliasTableName); gwip->rcWorkStack.push(scp->clone()); - boost::shared_ptr scsp(scp); + boost::shared_ptr scsp(scp); gwip->scsp = scsp; gwip->funcName.clear(); @@ -6551,7 +6554,7 @@ void gp_walk(const Item* item, void* arg) } else if (col->type() == Item::FIELD_ITEM && gwip->clauseType == HAVING) { - //ReturnedColumn* rc = buildAggFrmTempField(const_cast(item), *gwip); + // ReturnedColumn* rc = buildAggFrmTempField(const_cast(item), *gwip); ReturnedColumn* rc = buildReturnedColumn(const_cast(item), *gwip, gwip->fatalParseError); if (rc) gwip->rcWorkStack.push(rc); @@ -6566,7 +6569,7 @@ void gp_walk(const Item* item, void* arg) SimpleColumn* thisSC = dynamic_cast(rc); if (thisSC) { - gwip->scsp.reset(thisSC->clone()); + gwip->scsp.reset(thisSC->clone()); } if (!rc && !cando) { @@ -6785,14 +6788,14 @@ void parse_item(Item* item, vector& field_vec, bool& hasNonSupportI // MCOL-1510. This could be a non-supported function // argument in form of a temp_table_field, so check // and set hasNonSupportItem if it is so. - //ReturnedColumn* rc = NULL; - //if (gwi) + // ReturnedColumn* rc = NULL; + // if (gwi) // rc = buildAggFrmTempField(ref, *gwi); - //if (!rc) + // if (!rc) //{ - Item_field* ifp = static_cast(*(ref->ref)); - field_vec.push_back(ifp); + Item_field* ifp = static_cast(*(ref->ref)); + field_vec.push_back(ifp); //} break; } @@ -7512,7 +7515,7 @@ int processWhere(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, const s if (!gwi.rcWorkStack.empty()) { - while(!gwi.rcWorkStack.empty()) + while (!gwi.rcWorkStack.empty()) { ReturnedColumn* t = gwi.rcWorkStack.top(); delete t; @@ -7521,7 +7524,7 @@ int processWhere(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, const s } if (!gwi.ptWorkStack.empty()) { - while(!gwi.ptWorkStack.empty()) + while (!gwi.ptWorkStack.empty()) { ParseTree* t = gwi.ptWorkStack.top(); delete t; @@ -7529,7 +7532,6 @@ int processWhere(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, const s } } - return 0; } @@ -7818,7 +7820,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i } gwi.clauseType = SELECT; - SELECT_LEX* oldSelectLex = gwi.select_lex; // XXX: SZ: should it be restored in case of error return? + SELECT_LEX* oldSelectLex = gwi.select_lex; // XXX: SZ: should it be restored in case of error return? gwi.select_lex = &select_lex; #ifdef DEBUG_WALK_COND { @@ -7931,7 +7933,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i ReturnedColumn* rc = wrapIntoAggregate(sc, gwi, baseItem); SRCP sprc(rc); - pushReturnedCol(gwi, baseItem, sprc); + pushReturnedCol(gwi, baseItem, sprc); gwi.columnMap.insert( CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), sprc)); @@ -7968,7 +7970,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i // add this agg col to returnedColumnList boost::shared_ptr spac(ac); - pushReturnedCol(gwi, item, spac); + pushReturnedCol(gwi, item, spac); break; } @@ -8027,7 +8029,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i if (!hasNonSupportItem && ifp->const_item() && !(parseInfo & AF_BIT) && tmpVec.size() == 0) { srcp.reset(buildReturnedColumn(item, gwi, gwi.fatalParseError)); - pushReturnedCol(gwi, item, srcp); + pushReturnedCol(gwi, item, srcp); if (ifp->name.length) srcp->alias(ifp->name.str); @@ -8035,7 +8037,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i continue; } - pushReturnedCol(gwi, item, srcp); + pushReturnedCol(gwi, item, srcp); } else // This was a vtable post-process block { @@ -8057,7 +8059,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i if (ifp->name.length) cc->alias(ifp->name.str); - pushReturnedCol(gwi, ifp, srcp); + pushReturnedCol(gwi, ifp, srcp); // clear the error set by buildFunctionColumn gwi.fatalParseError = false; @@ -8135,7 +8137,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i if (item->name.length) srcp->alias(item->name.str); - pushReturnedCol(gwi, item, srcp); + pushReturnedCol(gwi, item, srcp); } break; @@ -8159,7 +8161,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i else { SRCP srcp(buildReturnedColumn(item, gwi, gwi.fatalParseError)); - pushReturnedCol(gwi, item, srcp); + pushReturnedCol(gwi, item, srcp); if (item->name.length) srcp->alias(item->name.str); @@ -8255,7 +8257,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i return ER_CHECK_NOT_IMPLEMENTED; } - pushReturnedCol(gwi, item, srcp); + pushReturnedCol(gwi, item, srcp); break; } case Item::TYPE_HOLDER: @@ -9105,7 +9107,6 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i int cp_get_table_plan(THD* thd, SCSEP& csep, cal_table_info& ti, long timeZone) { - SubQueryChainHolder chainHolder; bool allocated = false; gp_walk_info* gwi; diff --git a/mysql-test/columnstore/bugfixes/MCOL-5852-group-concat-memory-accounting.result b/mysql-test/columnstore/bugfixes/MCOL-5852-group-concat-memory-accounting.result new file mode 100644 index 000000000..1ce0c8bd6 --- /dev/null +++ b/mysql-test/columnstore/bugfixes/MCOL-5852-group-concat-memory-accounting.result @@ -0,0 +1,44 @@ +DROP DATABASE IF EXISTS mcol_5852; +CREATE DATABASE mcol_5852; +USE mcol_5852; +CREATE TABLE gc ( +id INTEGER NOT NULL, +longtxt TEXT NOT NULL +) ENGINE=ColumnStore; +SET max_recursive_iterations=100000; +INSERT INTO gc ( +WITH RECURSIVE series AS ( +SELECT 1 AS id, REPEAT('=', 1024) AS longtxt +UNION ALL +SELECT id + 1 AS id, longtxt FROM series WHERE id < 50000 +) SELECT id, longtxt FROM series); +SET columnstore_um_mem_limit=64; +SELECT id, GROUP_CONCAT(longtxt) FROM gc GROUP BY 1 ORDER BY 1 LIMIT 10; +ERROR HY000: Internal error: TupleAggregateStep::threadedAggregateRowGroups() MCS-2003: Aggregation/Distinct memory limit is exceeded. +SET columnstore_um_mem_limit=512; +SELECT id, GROUP_CONCAT(longtxt) FROM gc GROUP BY 1 ORDER BY 1 LIMIT 10; +id GROUP_CONCAT(longtxt) +1 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +2 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +3 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +4 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +5 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +6 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +7 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +8 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +9 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +10 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +SET columnstore_um_mem_limit=64; +SELECT id, GROUP_CONCAT(longtxt) FROM gc GROUP BY 1 ORDER BY 1 LIMIT 10; +id GROUP_CONCAT(longtxt) +1 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +2 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +3 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +4 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +5 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +6 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +7 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +8 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +9 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +10 ================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================ +DROP DATABASE mcol_5852; diff --git a/mysql-test/columnstore/bugfixes/MCOL-5852-group-concat-memory-accounting.test b/mysql-test/columnstore/bugfixes/MCOL-5852-group-concat-memory-accounting.test new file mode 100644 index 000000000..f504edf29 --- /dev/null +++ b/mysql-test/columnstore/bugfixes/MCOL-5852-group-concat-memory-accounting.test @@ -0,0 +1,36 @@ +--source ../include/have_columnstore.inc + +--disable_warnings +DROP DATABASE IF EXISTS mcol_5852; +--enable_warnings + +CREATE DATABASE mcol_5852; +USE mcol_5852; + +CREATE TABLE gc ( + id INTEGER NOT NULL, + longtxt TEXT NOT NULL +) ENGINE=ColumnStore; + +SET max_recursive_iterations=100000; +INSERT INTO gc ( + WITH RECURSIVE series AS ( + SELECT 1 AS id, REPEAT('=', 1024) AS longtxt + UNION ALL + SELECT id + 1 AS id, longtxt FROM series WHERE id < 50000 + ) SELECT id, longtxt FROM series); + +SET columnstore_um_mem_limit=64; +--exec /usr/bin/mcsSetConfig RowAggregation AllowDiskBasedAggregation N +--error 1815 +SELECT id, GROUP_CONCAT(longtxt) FROM gc GROUP BY 1 ORDER BY 1 LIMIT 10; + +SET columnstore_um_mem_limit=512; +SELECT id, GROUP_CONCAT(longtxt) FROM gc GROUP BY 1 ORDER BY 1 LIMIT 10; + +SET columnstore_um_mem_limit=64; +--exec /usr/bin/mcsSetConfig RowAggregation AllowDiskBasedAggregation Y +SELECT id, GROUP_CONCAT(longtxt) FROM gc GROUP BY 1 ORDER BY 1 LIMIT 10; + +# cleanup +DROP DATABASE mcol_5852; diff --git a/utils/messageqcpp/bytestream.cpp b/utils/messageqcpp/bytestream.cpp index 5d6bde298..2546cde09 100644 --- a/utils/messageqcpp/bytestream.cpp +++ b/utils/messageqcpp/bytestream.cpp @@ -431,7 +431,6 @@ ByteStream& ByteStream::operator>>(utils::NullString& s) return *this; } - ByteStream& ByteStream::operator>>(uint8_t*& bpr) { peek(bpr); diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 34203fee4..cb968c251 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -35,7 +35,6 @@ #include "mcs_basic_types.h" #include "resourcemanager.h" #include "groupconcat.h" -#include "jsonarrayagg.h" #include "blocksize.h" #include "errorcodes.h" @@ -537,6 +536,7 @@ RowAggregation::RowAggregation(const RowAggregation& rhs) , fRm(rhs.fRm) , fSessionMemLimit(rhs.fSessionMemLimit) , fRollupFlag(rhs.fRollupFlag) + , fGroupConcat(rhs.fGroupConcat) { fGroupByCols.assign(rhs.fGroupByCols.begin(), rhs.fGroupByCols.end()); fFunctionCols.assign(rhs.fFunctionCols.begin(), rhs.fFunctionCols.end()); @@ -661,14 +661,17 @@ void RowAggregation::resetUDAF(RowUDAFFunctionCol* rowUDAF, uint64_t funcColsIdx //------------------------------------------------------------------------------ void RowAggregation::initialize(bool hasGroupConcat) { + if (hasGroupConcat) + { + fRowGroupOut->setUseAggregateDataStore(true, fGroupConcat); + } // Calculate the length of the hashmap key. fAggMapKeyCount = fGroupByCols.size(); bool disk_agg = fRm ? fRm->getAllowDiskAggregation() : false; bool allow_gen = true; for (auto& fun : fFunctionCols) { - if (fun->fAggFunction == ROWAGG_UDAF || fun->fAggFunction == ROWAGG_GROUP_CONCAT || - fun->fAggFunction == ROWAGG_JSON_ARRAY) + if (fun->fAggFunction == ROWAGG_UDAF) { allow_gen = false; break; @@ -757,8 +760,7 @@ void RowAggregation::aggReset() bool allow_gen = true; for (auto& fun : fFunctionCols) { - if (fun->fAggFunction == ROWAGG_UDAF || fun->fAggFunction == ROWAGG_GROUP_CONCAT || - fun->fAggFunction == ROWAGG_JSON_ARRAY) + if (fun->fAggFunction == ROWAGG_UDAF) { allow_gen = false; break; @@ -1884,9 +1886,9 @@ void RowAggregation::mergeEntries(const Row& rowIn) case ROWAGG_DUP_AVG: case ROWAGG_DUP_STATS: case ROWAGG_DUP_UDAF: - case ROWAGG_CONSTANT: + case ROWAGG_CONSTANT: break; case ROWAGG_JSON_ARRAY: - case ROWAGG_GROUP_CONCAT: break; + case ROWAGG_GROUP_CONCAT: mergeGroupConcat(rowIn, colOut); break; case ROWAGG_UDAF: doUDAF(rowIn, colOut, colOut, colOut + 1, i); break; @@ -2138,6 +2140,12 @@ void RowAggregation::mergeStatistics(const Row& rowIn, uint64_t colOut, uint64_t colAux + 1); } +void RowAggregation::mergeGroupConcat(const Row& rowIn, uint64_t colOut) +{ + auto* gccAg = fRow.getAggregateData(colOut); + gccAg->merge(rowIn, colOut); +} + void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int64_t colAux, uint64_t& funcColsIdx, std::vector* rgContextColl) { @@ -2540,7 +2548,6 @@ RowAggregationUM::RowAggregationUM(const RowAggregationUM& rhs) , fExpression(rhs.fExpression) , fTotalMemUsage(rhs.fTotalMemUsage) , fConstantAggregate(rhs.fConstantAggregate) - , fGroupConcat(rhs.fGroupConcat) , fLastMemUsage(rhs.fLastMemUsage) { } @@ -2626,28 +2633,19 @@ void RowAggregationUM::attachGroupConcatAg() { if (fGroupConcat.size() > 0) { - uint8_t* data = fRow.getData(); - uint64_t i = 0, j = 0; + uint64_t gc_idx = 0; - for (; i < fFunctionColGc.size(); i++) + for (uint64_t i = 0; i < fFunctionColGc.size(); i++) { + if (fFunctionColGc[i]->fAggFunction != ROWAGG_GROUP_CONCAT && + fFunctionColGc[i]->fAggFunction != ROWAGG_JSON_ARRAY) + { + continue; + } int64_t colOut = fFunctionColGc[i]->fOutputColumnIndex; - - if (fFunctionColGc[i]->fAggFunction == ROWAGG_GROUP_CONCAT) - { - // save the object's address in the result row - SP_GroupConcatAg gcc(new joblist::GroupConcatAgUM(fGroupConcat[j++])); - fGroupConcatAg.push_back(gcc); - *((GroupConcatAg**)(data + fRow.getOffset(colOut))) = gcc.get(); - } - - if (fFunctionColGc[i]->fAggFunction == ROWAGG_JSON_ARRAY) - { - // save the object's address in the result row - SP_GroupConcatAg gcc(new joblist::JsonArrayAggregatAgUM(fGroupConcat[j++])); - fGroupConcatAg.push_back(gcc); - *((GroupConcatAg**)(data + fRow.getOffset(colOut))) = gcc.get(); - } + joblist::SP_GroupConcatAg gcc(new joblist::GroupConcatAg( + fGroupConcat[gc_idx++], fFunctionColGc[i]->fAggFunction == ROWAGG_JSON_ARRAY)); + fRow.setAggregateData(gcc, colOut); } } } @@ -2706,14 +2704,9 @@ void RowAggregationUM::updateEntry(const Row& rowIn, std::vectorprocessRow(rowIn); -} - -void RowAggregationUM::doJsonAgg(const Row& rowIn, int64_t, int64_t o) -{ - uint8_t* data = fRow.getData(); - joblist::JsonArrayAggregatAgUM* gccAg = *((joblist::JsonArrayAggregatAgUM**)(data + fRow.getOffset(o))); + auto* gccAg = fRow.getAggregateData(o); gccAg->processRow(rowIn); } @@ -4158,30 +4143,17 @@ void RowAggregationUM::setGroupConcatString() for (uint64_t i = 0; i < fRowGroupOut->getRowCount(); i++, fRow.nextRow()) { - for (uint64_t j = 0; j < fFunctionCols.size(); j++) + for (const auto& fcall : fFunctionCols) { - uint8_t* data = fRow.getData(); - - if (fFunctionCols[j]->fAggFunction == ROWAGG_GROUP_CONCAT) + if (fcall->fAggFunction != ROWAGG_GROUP_CONCAT && fcall->fAggFunction != ROWAGG_JSON_ARRAY) { - uint8_t* buff = data + fRow.getOffset(fFunctionCols[j]->fOutputColumnIndex); - uint8_t* gcString; - joblist::GroupConcatAgUM* gccAg = *((joblist::GroupConcatAgUM**)buff); - gcString = gccAg->getResult(); - utils::ConstString str((char*)gcString, gcString ? strlen((const char*)gcString) : 0); - fRow.setStringField(str, fFunctionCols[j]->fOutputColumnIndex); - // gccAg->getResult(buff); + continue; } - if (fFunctionCols[j]->fAggFunction == ROWAGG_JSON_ARRAY) - { - uint8_t* buff = data + fRow.getOffset(fFunctionCols[j]->fOutputColumnIndex); - uint8_t* gcString; - joblist::JsonArrayAggregatAgUM* gccAg = *((joblist::JsonArrayAggregatAgUM**)buff); - gcString = gccAg->getResult(); - utils::ConstString str((char*)gcString, gcString ? strlen((char*)gcString) : 0); - fRow.setStringField(str, fFunctionCols[j]->fOutputColumnIndex); - } + auto* gccAg = fRow.getAggregateData(fcall->fOutputColumnIndex); + uint8_t* gcString = gccAg->getResult(); + utils::ConstString str((char*)gcString, gcString ? strlen((const char*)gcString) : 0); + fRow.setStringField(str, fcall->fOutputColumnIndex); } } } @@ -4306,14 +4278,9 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn, std::vectormerge(rowIn, i); -} - -void RowAggregationUMP2::doJsonAgg(const Row& rowIn, int64_t i, int64_t o) -{ - uint8_t* data = fRow.getData(); - joblist::JsonArrayAggregatAgUM* gccAg = *((joblist::JsonArrayAggregatAgUM**)(data + fRow.getOffset(o))); + auto* gccAg = fRow.getAggregateData(o); gccAg->merge(rowIn, i); } @@ -4803,14 +4762,9 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn, std::vectormerge(rowIn, i); } -void RowAggregationSubDistinct::doJsonAgg(const Row& rowIn, int64_t i, int64_t o) -{ - uint8_t* data = fRow.getData(); - joblist::JsonArrayAggregatAgUM* gccAg = *((joblist::JsonArrayAggregatAgUM**)(data + fRow.getOffset(o))); - gccAg->merge(rowIn, i); -} //------------------------------------------------------------------------------ // Constructor / destructor //------------------------------------------------------------------------------ @@ -5129,12 +5076,122 @@ void RowAggregationMultiDistinct::doDistinctAggregation_rowVec( fOrigFunctionCols = nullptr; } -GroupConcatAg::GroupConcatAg(SP_GroupConcat& gcc) : fGroupConcat(gcc) +void GroupConcat::serialize(messageqcpp::ByteStream& bs) const { + uint64_t size; + + size = fGroupCols.size(); + bs << size; + for (const auto& [k, v] : fGroupCols) + { + bs << k; + bs << v; + } + size = fOrderCols.size(); + bs << size; + for (const auto& [k, v] : fOrderCols) + { + bs << k; + bs << static_cast(v); + } + bs << fSeparator; + size = fConstCols.size(); + bs << size; + for (const auto& [k, v] : fConstCols) + { + bs << k; + bs << v; + } + bs << static_cast(fDistinct); + bs << fSize; + fRowGroup.serialize(bs); + size = fRowGroup.getColumnCount() * sizeof(int); + bs << size; + bs.append(reinterpret_cast(fMapping.get()), size); + size = fOrderCond.size(); + bs << size; + for (const auto& [k, v] : fOrderCond) + { + bs << k; + bs << static_cast(v); + } + bs << fTimeZone; + bs << id; } -GroupConcatAg::~GroupConcatAg() +void GroupConcat::deserialize(messageqcpp::ByteStream& bs) { + fGroupCols.clear(); + fOrderCols.clear(); + fConstCols.clear(); + fOrderCond.clear(); + + RGDataSizeType size; + bs >> size; + fGroupCols.reserve(size); + for (RGDataSizeType i = 0; i < size; ++i) + { + uint32_t f, s; + bs >> f; + bs >> s; + fGroupCols.emplace_back(f, s); + } + bs >> size; + fOrderCols.reserve(size); + for (RGDataSizeType i = 0; i < size; ++i) + { + uint32_t f; + bs >> f; + uint8_t s; + bs >> s; + fOrderCond.emplace_back(f, static_cast(s)); + } + bs >> fSeparator; + bs >> size; + fConstCols.reserve(size); + for (RGDataSizeType i = 0; i < size; ++i) + { + utils::NullString f; + bs >> f; + uint32_t s; + bs >> s; + fConstCols.emplace_back(f, s); + } + uint8_t tmp8; + bs >> tmp8; + fDistinct = tmp8; + bs >> fSize; + fRowGroup.deserialize(bs); + bs >> size; + idbassert(size % sizeof(int) == 0); + fMapping.reset(new int[size / 4]); + memcpy(fMapping.get(), bs.buf(), size); + bs.advance(size); + bs >> size; + fOrderCond.reserve(size); + for (RGDataSizeType i = 0; i < size; ++i) + { + int f; + bs >> f; + uint8_t s; + bs >> s; + fOrderCond.emplace_back(f, static_cast(s)); + } + bs >> fTimeZone; + bs >> id; +} + +RGDataSizeType GroupConcat::getDataSize() const +{ + RGDataSizeType size = 0; + size += fGroupCols.capacity() * 8; + size += fOrderCols.capacity() * 8; + size += fSeparator.capacity(); + size += fConstCols.capacity() * (4 + sizeof(utils::NullString)); + size += fRowGroup.getEmptySize(); + size += fRowGroup.getColumnCount() * sizeof(int); + size += fOrderCols.capacity() * 8; + return size; } } // namespace rowgroup diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index 3c6ac0695..c66795363 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -327,7 +327,7 @@ struct ConstantAggData typedef boost::shared_ptr SP_ROWAGG_GRPBY_t; typedef boost::shared_ptr SP_ROWAGG_FUNC_t; -struct GroupConcat +struct GroupConcat : public messageqcpp::Serializeable { // GROUP_CONCAT(DISTINCT col1, 'const', col2 ORDER BY col3 desc SEPARATOR 'sep') std::vector> fGroupCols; // columns to concatenate, and position @@ -340,38 +340,26 @@ struct GroupConcat RowGroup fRowGroup; std::shared_ptr fMapping; std::vector> fOrderCond; // position to order by [asc/desc] - joblist::ResourceManager* fRm; // resource manager - boost::shared_ptr fSessionMemLimit; long fTimeZone; + uint32_t id; - GroupConcat() : fRm(nullptr) + GroupConcat() = default; + GroupConcat(joblist::ResourceManager* rm, boost::shared_ptr sessLimit) + : fRm(rm) + , fSessionMemLimit(sessLimit) { } + + void serialize(messageqcpp::ByteStream& bs) const override; + void deserialize(messageqcpp::ByteStream& bs) override; + RGDataSizeType getDataSize() const; + + joblist::ResourceManager* fRm{nullptr}; + boost::shared_ptr fSessionMemLimit; }; typedef boost::shared_ptr SP_GroupConcat; -class GroupConcatAg -{ - public: - explicit GroupConcatAg(SP_GroupConcat&); - virtual ~GroupConcatAg(); - - virtual void initialize() {}; - virtual void processRow(const rowgroup::Row&) {}; - virtual void merge(const rowgroup::Row&, uint64_t) {}; - - virtual uint8_t* getResult() - { - return nullptr; - } - - protected: - rowgroup::SP_GroupConcat fGroupConcat; -}; - -typedef boost::shared_ptr SP_GroupConcatAg; - //------------------------------------------------------------------------------ /** @brief Class that aggregates RowGroups. */ @@ -555,6 +543,8 @@ class RowAggregation : public messageqcpp::Serializeable virtual void doAvg(const Row&, int64_t, int64_t, int64_t, bool merge = false); virtual void doStatistics(const Row&, int64_t, int64_t, int64_t); void mergeStatistics(const Row&, uint64_t colOut, uint64_t colAux); + void mergeGroupConcat(const Row& rowIn, uint64_t colOut); + virtual void doBitOp(const Row&, int64_t, int64_t, int); virtual void doUDAF(const Row&, int64_t, int64_t, int64_t, uint64_t& funcColsIdx, std::vector* rgContextColl = nullptr); @@ -650,6 +640,8 @@ class RowAggregation : public messageqcpp::Serializeable std::string fTmpDir = config::Config::makeConfig()->getTempFileDir(config::Config::TempDirPurpose::Aggregates); std::string fCompStr = config::Config::makeConfig()->getConfig("RowAggregation", "Compression"); + + std::vector fGroupConcat; }; //------------------------------------------------------------------------------ @@ -794,7 +786,6 @@ class RowAggregationUM : public RowAggregation // @bug3362, group_concat virtual void doGroupConcat(const Row&, int64_t, int64_t); - virtual void doJsonAgg(const Row&, int64_t, int64_t); virtual void setGroupConcatString(); bool fHasAvg; @@ -814,8 +805,6 @@ class RowAggregationUM : public RowAggregation std::vector fConstantAggregate; // @bug3362, group_concat - std::vector fGroupConcat; - std::vector fGroupConcatAg; std::vector fFunctionColGc; private: @@ -856,7 +845,6 @@ class RowAggregationUMP2 : public RowAggregationUM void doAvg(const Row&, int64_t, int64_t, int64_t, bool merge = false) override; void doStatistics(const Row&, int64_t, int64_t, int64_t) override; void doGroupConcat(const Row&, int64_t, int64_t) override; - void doJsonAgg(const Row&, int64_t, int64_t) override; void doBitOp(const Row&, int64_t, int64_t, int) override; void doUDAF(const Row&, int64_t, int64_t, int64_t, uint64_t& funcColsIdx, std::vector* rgContextColl = nullptr) override; @@ -964,7 +952,6 @@ class RowAggregationSubDistinct : public RowAggregationUM protected: // virtual methods from RowAggregationUM void doGroupConcat(const Row&, int64_t, int64_t) override; - void doJsonAgg(const Row&, int64_t, int64_t) override; // for groupby columns and the aggregated distinct column Row fDistRow; boost::scoped_array fDistRowData; diff --git a/utils/rowgroup/rowgroup.cpp b/utils/rowgroup/rowgroup.cpp index b031fb221..936fdd59d 100644 --- a/utils/rowgroup/rowgroup.cpp +++ b/utils/rowgroup/rowgroup.cpp @@ -29,6 +29,8 @@ // #define NDEBUG #include #include + +#include "rowaggregation.h" using namespace std; #include @@ -43,6 +45,7 @@ using namespace execplan; #include "rowgroup.h" #include "dataconvert.h" #include "columnwidth.h" +#include "groupconcat.h" namespace rowgroup { @@ -305,6 +308,72 @@ void UserDataStore::deserialize(ByteStream& bs) return; } +void AggregateDataStore::serialize(messageqcpp::ByteStream& bs) const +{ + uint64_t size = fGroupConcat.size(); + bs << size; + for (const auto& gc : fGroupConcat) + { + gc->serialize(bs); + } + size = fData.size(); + bs << size; + for (const auto& gca : fData) + { + bs << gca->getGroupConcatId(); + gca->serialize(bs); + } +} + +void AggregateDataStore::deserialize(messageqcpp::ByteStream& bs) +{ + fGroupConcat.clear(); + fData.clear(); + uint64_t size; + bs >> size; + fGroupConcat.resize(size); + for (uint64_t i = 0; i < size; i++) + { + fGroupConcat[i].reset(new GroupConcat()); + fGroupConcat[i]->deserialize(bs); + } + bs >> size; + fData.resize(size); + for (uint64_t i = 0; i < size; i++) + { + uint32_t gc_id; + bs >> gc_id; + idbassert(gc_id < fGroupConcat.size()); + fData[i].reset(new joblist::GroupConcatAg(fGroupConcat[gc_id])); + fData[i]->deserialize(bs); + } +} + +uint32_t AggregateDataStore::storeAggregateData(boost::shared_ptr& data) +{ + fData.emplace_back(data); + return fData.size() - 1; +} + +boost::shared_ptr AggregateDataStore::getAggregateData(uint32_t pos) const +{ + idbassert(pos < fData.size()); + return fData[pos]; +} + +RGDataSizeType AggregateDataStore::getDataSize() const +{ + RGDataSizeType size = 0; + for (const auto& gc : fGroupConcat) + { + size += gc->getDataSize(); + } + for (const auto& gca : fData) + { + size += gca->getDataSize(); + } + return size; +} RGData::RGData(allocators::CountingAllocator& _alloc) : RGData() { @@ -316,29 +385,24 @@ RGData::RGData(const RowGroup& rg, uint32_t rowCount) RGDataSizeType s = rg.getDataSize(rowCount); rowData.reset(new uint8_t[s]); - if (rg.usesStringTable() && rowCount > 0) { + if (rg.usesStringTable() && rowCount > 0) + { strings.reset(new StringStore()); strings->useOnlyLongStrings(rg.usesOnlyLongString()); } + if (rg.usesAggregateDataStore()) + { + aggregateDataStore.reset(new AggregateDataStore(rg.getGroupConcats())); + } + userDataStore.reset(); columnCount = rg.getColumnCount(); rowSize = rg.getRowSize(); } -RGData::RGData(const RowGroup& rg) +RGData::RGData(const RowGroup& rg) : RGData(rg, rgCommonSize) { - rowData.reset(new uint8_t[rg.getMaxDataSize()]); - - if (rg.usesStringTable()) - { - strings.reset(new StringStore()); - strings->useOnlyLongStrings(rg.usesOnlyLongString()); - } - - userDataStore.reset(); - columnCount = rg.getColumnCount(); - rowSize = rg.getRowSize(); } @@ -371,21 +435,28 @@ void RGData::reinit(const RowGroup& rg, uint32_t rowCount) userDataStore.reset(); - if (rg.usesStringTable()) + if (rg.usesStringTable() || rg.usesOnlyLongString()) { if (alloc) { allocators::CountingAllocator ssAlloc = alloc.value(); strings.reset(new StringStore(ssAlloc)); + strings->useOnlyLongStrings(rg.usesOnlyLongString()); } else { strings.reset(new StringStore()); } - } else strings.reset(); + + if (rg.usesAggregateDataStore()) + { + aggregateDataStore.reset(new AggregateDataStore(rg.getGroupConcats())); + } + else + aggregateDataStore.reset(); columnCount = rg.getColumnCount(); rowSize = rg.getRowSize(); } @@ -419,6 +490,14 @@ void RGData::serialize(ByteStream& bs, RGDataSizeType amount) const } else bs << (uint8_t)0; + + if (aggregateDataStore) + { + bs << (uint8_t)1; + aggregateDataStore->serialize(bs); + } + else + bs << (uint8_t)0; } void RGData::deserialize(ByteStream& bs, RGDataSizeType defAmount) @@ -473,6 +552,15 @@ void RGData::deserialize(ByteStream& bs, RGDataSizeType defAmount) } else userDataStore.reset(); + + bs >> tmp8; + if (tmp8) + { + aggregateDataStore.reset(new AggregateDataStore()); + aggregateDataStore->deserialize(bs); + } + else + aggregateDataStore.reset(); } return; @@ -1133,8 +1221,10 @@ RowGroup::RowGroup(const RowGroup& r) , precision(r.precision) , rgData(r.rgData) , strings(r.strings) + , aggregateDataStore(r.aggregateDataStore) , useStringTable(r.useStringTable) , useOnlyLongStrings(r.useOnlyLongStrings) + , useAggregateDataStore(r.useAggregateDataStore) , hasCollation(r.hasCollation) , hasLongStringField(r.hasLongStringField) , sTableThreshold(r.sTableThreshold) @@ -1166,8 +1256,10 @@ RowGroup& RowGroup::operator=(const RowGroup& r) precision = r.precision; rgData = r.rgData; strings = r.strings; + aggregateDataStore = r.aggregateDataStore; useStringTable = r.useStringTable; useOnlyLongStrings = r.useOnlyLongStrings; + useAggregateDataStore = r.useAggregateDataStore; hasCollation = r.hasCollation; hasLongStringField = r.hasLongStringField; sTableThreshold = r.sTableThreshold; @@ -1261,6 +1353,25 @@ void RowGroup::deserialize(ByteStream& bs) charsets.insert(charsets.begin(), charsetNumbers.size(), nullptr); } +void RowGroup::setUseAggregateDataStore(bool b, std::span> group_concats) +{ + idbassert(!b || !group_concats.empty()); + if (useAggregateDataStore && !b) + { + fGroupConcats.clear(); + } + else if (b) + { + fGroupConcats.assign(group_concats.begin(), group_concats.end()); + if (rgData) + { + rgData->aggregateDataStore.reset(new AggregateDataStore(fGroupConcats)); + aggregateDataStore = rgData->aggregateDataStore.get(); + } + } + useAggregateDataStore = b; +} + void RowGroup::serializeRGData(ByteStream& bs) const { rgData->serialize(bs, getDataSize()); diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h index 2401a7253..b726c0550 100644 --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -27,6 +27,7 @@ #pragma once +#include #include #include #include @@ -60,6 +61,10 @@ #include "execinfo.h" // Workaround for my_global.h #define of isnan(X) causing a std::std namespace +namespace joblist +{ +class GroupConcatAg; +} namespace rowgroup { @@ -172,8 +177,14 @@ class StringStore { return fUseStoreStringMutex; } - void useOnlyLongStrings(bool b) { fUseOnlyLongStrings = b; } - bool useOnlyLongStrings() const { return fUseOnlyLongStrings; } + void useOnlyLongStrings(bool b) + { + fUseOnlyLongStrings = b; + } + bool useOnlyLongStrings() const + { + return fUseOnlyLongStrings; + } // This is an overlay b/c the underlying data needs to be any size, // and alloc'd in one chunk. data can't be a separate dynamic chunk. @@ -256,6 +267,36 @@ class UserDataStore boost::mutex fMutex; }; +struct GroupConcat; + +class AggregateDataStore +{ + public: + AggregateDataStore() = default; + explicit AggregateDataStore(const std::vector>& groupConcat) + : fGroupConcat(groupConcat) + { + } + ~AggregateDataStore() = default; + AggregateDataStore(const AggregateDataStore&) = delete; + AggregateDataStore(AggregateDataStore&&) = delete; + AggregateDataStore& operator=(const AggregateDataStore&) = delete; + AggregateDataStore& operator=(AggregateDataStore&&) = delete; + + void serialize(messageqcpp::ByteStream&) const; + void deserialize(messageqcpp::ByteStream&); + + uint32_t storeAggregateData(boost::shared_ptr& data); + boost::shared_ptr getAggregateData(uint32_t pos) const; + + RGDataSizeType getDataSize() const; + + private: + friend class RGData; + std::vector> fGroupConcat; + std::vector> fData; +}; + class RowGroup; class Row; @@ -331,6 +372,7 @@ class RGData boost::shared_ptr rowData; boost::shared_ptr strings; std::shared_ptr userDataStore; + std::shared_ptr aggregateDataStore; std::optional> alloc = {}; // Need sig to support backward compat. RGData can deserialize both forms. @@ -356,9 +398,14 @@ class Row inline Pointer(uint8_t* d, StringStore* s, UserDataStore* u) : data(d), strings(s), userDataStore(u) { } + inline Pointer(uint8_t* d, StringStore* s, UserDataStore* u, AggregateDataStore* a) + : data(d), strings(s), userDataStore(u), aggregateDataStore(a) + { + } uint8_t* data = nullptr; StringStore* strings = nullptr; UserDataStore* userDataStore = nullptr; + AggregateDataStore* aggregateDataStore = nullptr; }; Row() = default; @@ -526,6 +573,8 @@ class Row inline boost::shared_ptr getUserData(uint32_t colIndex) const; inline void setUserData(mcsv1sdk::mcsv1Context& context, boost::shared_ptr userData, uint32_t len, uint32_t colIndex); + inline void setAggregateData(boost::shared_ptr data, uint32_t colIndex); + inline joblist::GroupConcatAg* getAggregateData(uint32_t colIndex) const; uint64_t getNullValue(uint32_t colIndex) const; bool isNullValue(uint32_t colIndex) const; @@ -638,14 +687,15 @@ class Row bool hasLongStringField = false; uint32_t sTableThreshold = 20; std::shared_ptr forceInline; - UserDataStore* userDataStore = nullptr; // For UDAF + UserDataStore* userDataStore = nullptr; // For UDAF + AggregateDataStore* aggregateDataStore = nullptr; // group_concat & json_arrayagg friend class RowGroup; }; inline Row::Pointer Row::getPointer() const { - return Pointer(data, strings, userDataStore); + return Pointer(data, strings, userDataStore, aggregateDataStore); } inline uint8_t* Row::getData() const { @@ -665,6 +715,7 @@ inline void Row::setPointer(const Pointer& p) } userDataStore = p.userDataStore; + aggregateDataStore = p.aggregateDataStore; } inline void Row::setData(const Pointer& p) @@ -1258,7 +1309,7 @@ inline void Row::setUintField(uint64_t val, uint32_t colIndex) template inline void Row::setIntField(int64_t val, uint32_t colIndex) { -// idbassert(getColumnWidth(colIndex) == len); + // idbassert(getColumnWidth(colIndex) == len); switch (len) { case 1: *((int8_t*)&data[offsets[colIndex]]) = val; break; @@ -1362,6 +1413,28 @@ inline void Row::setUserData(mcsv1sdk::mcsv1Context& context, boost::shared_ptr< *((uint32_t*)&data[offsets[colIndex] + 4]) = len; } +inline void Row::setAggregateData(boost::shared_ptr agData, uint32_t colIndex) +{ + if (!aggregateDataStore) + { + throw std::logic_error("Row::getAggregateData: no aggregateDataStore"); + } + + uint32_t pos = aggregateDataStore->storeAggregateData(agData); + *((uint32_t*)&data[offsets[colIndex]]) = pos; +} + +inline joblist::GroupConcatAg* Row::getAggregateData(uint32_t colIndex) const +{ + if (!aggregateDataStore) + { + throw std::logic_error("Row::getAggregateData: no aggregateDataStore"); + } + + uint32_t pos = *((uint32_t*)&data[offsets[colIndex]]); + return aggregateDataStore->getAggregateData(pos).get(); +} + inline void Row::copyField(uint32_t destIndex, uint32_t srcIndex) const { uint32_t n = offsets[destIndex + 1] - offsets[destIndex]; @@ -1559,8 +1632,19 @@ class RowGroup : public messageqcpp::Serializeable inline bool usesStringTable() const; inline void setUseStringTable(bool); - void setUseOnlyLongString(bool b) { useOnlyLongStrings = b; } - bool usesOnlyLongString() const { return useOnlyLongStrings ; } + void setUseOnlyLongString(bool b) + { + useOnlyLongStrings = b; + } + bool usesOnlyLongString() const + { + return useOnlyLongStrings; + } + void setUseAggregateDataStore(bool b, std::span> group_concats = {}); + bool usesAggregateDataStore() const + { + return useAggregateDataStore; + } bool hasLongString() const { @@ -1606,6 +1690,11 @@ class RowGroup : public messageqcpp::Serializeable const CHARSET_INFO* getCharset(uint32_t col); + const auto& getGroupConcats() const + { + return fGroupConcats; + } + private: uint32_t columnCount = 0; uint8_t* data = nullptr; @@ -1632,19 +1721,22 @@ class RowGroup : public messageqcpp::Serializeable // string table impl RGData* rgData = nullptr; StringStore* strings = nullptr; // note, strings and data belong to rgData + AggregateDataStore* aggregateDataStore = nullptr; bool useStringTable = true; bool useOnlyLongStrings = false; - bool useAggregateDataStore = true; + bool useAggregateDataStore = false; bool hasCollation = false; bool hasLongStringField = false; uint32_t sTableThreshold = 20; std::shared_ptr forceInline; - static const uint64_t headerSize = 18; - static const uint64_t rowCountOffset = 0; - static const uint64_t baseRidOffset = 4; - static const uint64_t statusOffset = 12; - static const uint64_t dbRootOffset = 14; + std::vector> fGroupConcats; + + static constexpr uint64_t headerSize = 18; + static constexpr uint64_t rowCountOffset = 0; + static constexpr uint64_t baseRidOffset = 4; + static constexpr uint64_t statusOffset = 12; + static constexpr uint64_t dbRootOffset = 14; }; inline uint64_t convertToRid(const uint32_t& partNum, const uint16_t& segNum, const uint8_t& extentNum, @@ -1700,12 +1792,14 @@ inline void RowGroup::getRow(uint32_t rowNum, Row* r) const r->data = &(data[headerSize + (rowNum * r->getSize())]); r->strings = strings; r->userDataStore = rgData->userDataStore.get(); + r->aggregateDataStore = rgData->aggregateDataStore.get(); } inline void RowGroup::setData(RGData* rgd) { data = rgd->rowData.get(); strings = rgd->strings.get(); + aggregateDataStore = rgd->aggregateDataStore.get(); rgData = rgd; } @@ -1792,10 +1886,16 @@ inline uint32_t RowGroup::getRowSizeWithStrings() const inline RGDataSizeType RowGroup::getSizeWithStrings(uint64_t n) const { - if (strings == nullptr) - return getDataSize(n); - else - return getDataSize(n) + strings->getSize(); + RGDataSizeType ret = getDataSize(n); + if (strings) + { + ret += strings->getSize(); + } + if (aggregateDataStore) + { + ret += aggregateDataStore->getDataSize(); + } + return ret; } inline uint64_t RowGroup::getSizeWithStrings() const @@ -2216,7 +2316,18 @@ inline void RGData::getRow(uint32_t num, Row* row) idbassert(columnCount == row->getColumnCount() && rowSize == incomingRowSize); row->setData( - Row::Pointer(&rowData[RowGroup::getHeaderSize() + (num * incomingRowSize)], strings.get(), userDataStore.get())); + Row::Pointer(&rowData[RowGroup::getHeaderSize() + (num * incomingRowSize)], strings.get(), + userDataStore.get(), aggregateDataStore.get())); +} + +inline uint64_t rowGidRidToIdx(uint64_t gid, uint32_t rid, uint32_t maxRows) +{ + return gid * maxRows + rid; +} + +inline std::pair rowIdxToGidRid(uint64_t idx, uint32_t maxRows) +{ + return {idx / maxRows, idx % maxRows}; } } // namespace rowgroup diff --git a/utils/rowgroup/rowstorage.cpp b/utils/rowgroup/rowstorage.cpp index bec3faa65..275386f77 100644 --- a/utils/rowgroup/rowstorage.cpp +++ b/utils/rowgroup/rowstorage.cpp @@ -584,6 +584,7 @@ class RowGroupStorage , fUniqId(this) , fTmpDir(tmpDir) , fCompressor(compressor) + , fUseDisk(!strict) { if (rm) { @@ -698,7 +699,7 @@ class RowGroupStorage logging::ERR_AGGREGATION_TOO_BIG); } - if (fMM->getFree() < memSz * 2) + if (fUseDisk && fMM->getFree() < memSz * 2) { saveRG(rgid); fRGDatas[rgid].reset(); @@ -880,8 +881,7 @@ class RowGroupStorage */ void getRow(uint64_t idx, Row& row) { - uint64_t rgid = idx / fMaxRows; - uint64_t rid = idx % fMaxRows; + auto [rgid, rid] = rowIdxToGidRid(idx, fMaxRows); if (UNLIKELY(!fRGDatas[rgid])) { loadRG(rgid); @@ -947,7 +947,7 @@ class RowGroupStorage } fLRU->add(fCurRgid); - idx = fCurRgid * fMaxRows + fRowGroupOut->getRowCount(); + idx = rowGidRidToIdx(fCurRgid, fRowGroupOut->getRowCount(), fMaxRows); fRowGroupOut->getRow(fRowGroupOut->getRowCount(), &row); fRowGroupOut->incRowCount(); } @@ -962,7 +962,7 @@ class RowGroupStorage */ void putKeyRow(uint64_t idx, Row& row) { - uint64_t rgid = idx / fMaxRows; + auto [rgid, rid] = rowIdxToGidRid(idx, fMaxRows); while (rgid >= fRGDatas.size()) { @@ -1157,6 +1157,7 @@ class RowGroupStorage ret->fGeneration = gen; ret->fCompressor = fCompressor; ret->fDumper.reset(new Dumper(fCompressor, fMM.get())); + ret->fUseDisk = fUseDisk; ret->loadFinalizedInfo(); return ret; } @@ -1165,8 +1166,7 @@ class RowGroupStorage */ void markFinalized(uint64_t idx) { - uint64_t gid = idx / 64; - uint64_t rid = idx % 64; + auto [gid, rid] = rowIdxToGidRid(idx, 64); if (LIKELY(fFinalizedRows.size() <= gid)) fFinalizedRows.resize(gid + 1, 0ULL); @@ -1176,8 +1176,7 @@ class RowGroupStorage /** @brief Check if row at specified index was finalized earlier */ bool isFinalized(uint64_t idx) const { - uint64_t gid = idx / 64; - uint64_t rid = idx % 64; + auto [gid, rid] = rowIdxToGidRid(idx, 64); if (LIKELY(fFinalizedRows.size() <= gid)) return false; @@ -1324,6 +1323,7 @@ class RowGroupStorage unlink(fname.c_str()); rgdata.reset(new RGData()); rgdata->deserialize(bs, fRowGroupOut->getDataSize(fMaxRows)); + assert(bs.length() == 0); fRowGroupOut->setData(rgdata.get()); auto memSz = fRowGroupOut->getSizeWithStrings(fMaxRows); @@ -1379,12 +1379,12 @@ class RowGroupStorage fRowGroupOut->serialize(bs); char buf[1024]; - snprintf(buf, sizeof(buf), "/tmp/kemm/META-p%u-t%p", getpid(), fUniqPtr); + snprintf(buf, sizeof(buf), "%s/META-p%u-t%p", fTmpDir.c_str(), getpid(), fUniqId); int fd = open(buf, O_WRONLY | O_TRUNC | O_CREAT, 0644); assert(fd >= 0); auto r = write(fd, bs.buf(), bs.length()); - assert(r == bs.length()); + assert(size_t(r) == bs.length()); close(fd); } #endif @@ -1421,6 +1421,7 @@ class RowGroupStorage std::string fTmpDir; compress::CompressInterface* fCompressor; std::unique_ptr fDumper; + bool fUseDisk; }; /** @brief Internal data for the hashmap */ diff --git a/utils/windowfunction/idborderby.cpp b/utils/windowfunction/idborderby.cpp index 88954a939..b94fe8794 100644 --- a/utils/windowfunction/idborderby.cpp +++ b/utils/windowfunction/idborderby.cpp @@ -526,11 +526,11 @@ int TimeCompare::operator()(IdbCompare* l, Row::Pointer r1, Row::Pointer r2) return ret; } -bool CompareRule::less(Row::Pointer r1, Row::Pointer r2) +bool CompareRule::less(Row::Pointer r1, Row::Pointer r2) const { - for (auto& compare : fCompares) + for (auto* compare : fCompares) { - int c = ((*compare)(fIdbCompare, r1, r2)); + int c = (*compare)(fIdbCompare, r1, r2); if (c < 0) return true; diff --git a/utils/windowfunction/idborderby.h b/utils/windowfunction/idborderby.h index 6616c3aaf..0babe962b 100644 --- a/utils/windowfunction/idborderby.h +++ b/utils/windowfunction/idborderby.h @@ -316,7 +316,7 @@ class CompareRule { } - bool less(rowgroup::Row::Pointer r1, rowgroup::Row::Pointer r2); + bool less(rowgroup::Row::Pointer r1, rowgroup::Row::Pointer r2) const; void compileRules(const std::vector&, const rowgroup::RowGroup&); void revertRules();