diff --git a/dbcon/joblist/groupconcat.cpp b/dbcon/joblist/groupconcat.cpp index 57f452458..42725c6c1 100644 --- a/dbcon/joblist/groupconcat.cpp +++ b/dbcon/joblist/groupconcat.cpp @@ -267,6 +267,14 @@ void GroupConcatInfo::mapColumns(const RowGroup& projRG) (*k)->fRowGroup = RowGroup(oids.size(), pos, oids, keys, types, csNums, scale, precision, projRG.getStringTableThreshold(), false); + + // MCOL-5429 Use stringstore if the datatype of the groupconcat + // field is a long string. + if ((*k)->fRowGroup.hasLongString()) + { + (*k)->fRowGroup.setUseStringTable(true); + } + (*k)->fMapping = makeMapping(projRG, (*k)->fRowGroup); } } @@ -318,10 +326,24 @@ void GroupConcatAgUM::initialize() fConcator->initialize(fGroupConcat); - fGroupConcat->fRowGroup.initRow(&fRow, true); - fData.reset(new uint8_t[fRow.getSize()]); - - fRow.setData(rowgroup::Row::Pointer(fData.get())); + // MCOL-5429 Use stringstore if the datatype of the groupconcat + // field is a long string. + if (fGroupConcat->fRowGroup.hasLongString()) + { + fRowGroup = fGroupConcat->fRowGroup; + fRowGroup.setUseStringTable(true); + fRowRGData.reinit(fRowGroup, 1); + fRowGroup.setData(&fRowRGData); + fRowGroup.resetRowGroup(0); + fRowGroup.initRow(&fRow); + fRowGroup.getRow(0, &fRow); + } + else + { + fGroupConcat->fRowGroup.initRow(&fRow, true); + fData.reset(new uint8_t[fRow.getSize()]); + fRow.setData(rowgroup::Row::Pointer(fData.get())); + } } void GroupConcatAgUM::processRow(const rowgroup::Row& inRow) @@ -397,7 +419,7 @@ GroupConcator::~GroupConcator() void GroupConcator::initialize(const rowgroup::SP_GroupConcat& gcc) { // MCOL-901 This value comes from the Server and it is - // too high(3MB) to allocate it for every instance. + // too high(1MB or 3MB by default) to allocate it for every instance. fGroupConcatLen = gcc->fSize; fCurrentLength -= strlen(gcc->fSeparator.c_str()); fTimeZone = gcc->fTimeZone; diff --git a/dbcon/joblist/groupconcat.h b/dbcon/joblist/groupconcat.h index b36a78d08..5d6174d78 100644 --- a/dbcon/joblist/groupconcat.h +++ b/dbcon/joblist/groupconcat.h @@ -93,6 +93,8 @@ class GroupConcatAgUM : public rowgroup::GroupConcatAg boost::scoped_ptr fConcator; boost::scoped_array fData; rowgroup::Row fRow; + rowgroup::RGData fRowRGData; + rowgroup::RowGroup fRowGroup; bool fNoOrder; }; diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index 6d0b9a8bb..595220ab2 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -5349,7 +5349,24 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) // Item_func_group_concat* gc = (Item_func_group_concat*)isp; CalpontSystemCatalog::ColType ct; ct.colDataType = CalpontSystemCatalog::VARCHAR; - ct.colWidth = isp->max_length; + + // MCOL-5429 CalpontSystemCatalog::ColType::colWidth is currently + // stored as an int32_t (see calpontsystemcatalog.h). However, + // Item_sum::max_length is an uint32_t. This means there will be an + // integer overflow when Item_sum::max_length > colWidth. This ultimately + // causes an array index out of bound in GroupConcator::swapStreamWithStringAndReturnBuf() + // in groupconcat.cpp when ExeMgr processes groupconcat. As a temporary + // fix, we cap off the max groupconcat length to INT32_MAX. The proper fix + // would be to change colWidth type to uint32_t. + if (isp->max_length <= INT32_MAX) + { + ct.colWidth = isp->max_length; + } + else + { + ct.colWidth = INT32_MAX; + } + ct.precision = 0; ac->resultType(ct); } diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index a56ea3128..85d1babae 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -649,7 +649,7 @@ void RowAggregation::resetUDAF(RowUDAFFunctionCol* rowUDAF, uint64_t funcColsIdx // Initilalize the data members to meaningful values, setup the hashmap. // The fRowGroupOut must have a valid data pointer before this. //------------------------------------------------------------------------------ -void RowAggregation::initialize() +void RowAggregation::initialize(bool hasGroupConcat) { // Calculate the length of the hashmap key. fAggMapKeyCount = fGroupByCols.size(); @@ -687,9 +687,25 @@ void RowAggregation::initialize() makeAggFieldsNull(fRow); // Keep a copy of the null row to initialize new map entries. - fRowGroupOut->initRow(&fNullRow, true); - fNullRowData.reset(new uint8_t[fNullRow.getSize()]); - fNullRow.setData(rowgroup::Row::Pointer(fNullRowData.get())); + // MCOL-5429 Use stringstore if the datatype of the groupconcat + // field is a long string. + if (hasGroupConcat && fRowGroupOut->hasLongString()) + { + fNullRowGroup = *fRowGroupOut; + fNullRowGroup.setUseStringTable(true); + fNullRowRGData.reinit(fNullRowGroup, 1); + fNullRowGroup.setData(&fNullRowRGData); + fNullRowGroup.resetRowGroup(0); + fNullRowGroup.initRow(&fNullRow); + fNullRowGroup.getRow(0, &fNullRow); + } + else + { + fRowGroupOut->initRow(&fNullRow, true); + fNullRowData.reset(new uint8_t[fNullRow.getSize()]); + fNullRow.setData(rowgroup::Row::Pointer(fNullRowData.get())); + } + copyRow(fRow, &fNullRow); // Lazy approach w/o a mapping b/w fFunctionCols idx and fRGContextColl idx @@ -2407,7 +2423,7 @@ void RowAggregationUM::endOfInput() //------------------------------------------------------------------------------ // Initilalize the Group Concat data //------------------------------------------------------------------------------ -void RowAggregationUM::initialize() +void RowAggregationUM::initialize(bool hasGroupConcat) { if (fGroupConcat.size() > 0) fFunctionColGc = fFunctionCols; @@ -2417,7 +2433,7 @@ void RowAggregationUM::initialize() fKeyRG = fRowGroupIn.truncate(fGroupByCols.size()); } - RowAggregation::initialize(); + RowAggregation::initialize(fGroupConcat.size() > 0); } //------------------------------------------------------------------------------ diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index 12b4d4acc..7406279a7 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -526,7 +526,7 @@ class RowAggregation : public messageqcpp::Serializeable } protected: - virtual void initialize(); + virtual void initialize(bool hasGroupConcat = false); virtual void initMapData(const Row& row); virtual void attachGroupConcatAg(); @@ -577,6 +577,8 @@ class RowAggregation : public messageqcpp::Serializeable Row fNullRow; Row* tmpRow; // used by the hashers & eq functors boost::scoped_array fNullRowData; + rowgroup::RGData fNullRowRGData; + rowgroup::RowGroup fNullRowGroup; std::unique_ptr fRowAggStorage; @@ -721,7 +723,7 @@ class RowAggregationUM : public RowAggregation protected: // virtual methods from base - void initialize() override; + void initialize(bool hasGroupConcat = false) override; void attachGroupConcatAg() override; void updateEntry(const Row& row, std::vector* rgContextColl = nullptr) override; diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h index 6fdffa761..f72517fd8 100644 --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -1484,6 +1484,11 @@ class RowGroup : public messageqcpp::Serializeable inline bool usesStringTable() const; inline void setUseStringTable(bool); + bool hasLongString() const + { + return hasLongStringField; + } + void serializeRGData(messageqcpp::ByteStream&) const; inline uint32_t getStringTableThreshold() const;