diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index e708d973d..c234a7271 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -1503,7 +1503,7 @@ void TupleAggregateStep::prep1PhaseAggregate(JobInfo& jobInfo, vector& RowGroup aggRG(oidsAgg.size(), posAgg, oidsAgg, keysAgg, typeAgg, csNumAgg, scaleAgg, precisionAgg, jobInfo.stringTableThreshold); - SP_ROWAGG_UM_t rowAgg(new RowAggregationUM(groupBy, functionVec, jobInfo.rm, jobInfo.umMemLimit, false)); + SP_ROWAGG_UM_t rowAgg(new RowAggregationUM(groupBy, functionVec, jobInfo.rm, jobInfo.umMemLimit, jobInfo.hasRollup)); rowAgg->timeZone(jobInfo.timeZone); rowgroups.push_back(aggRG); aggregators.push_back(rowAgg); @@ -5443,7 +5443,6 @@ void TupleAggregateStep::threadedAggregateRowGroups(uint32_t threadID) for (uint32_t i = 0; i < fNumOfBuckets; i++) { fAggregators[i].reset(fAggregator->clone()); - fAggregators[i]->clearRollup(); fAggregators[i]->setInputOutput(fRowGroupIn, &fRowGroupOuts[i]); } } @@ -5497,7 +5496,19 @@ void TupleAggregateStep::threadedAggregateRowGroups(uint32_t threadID) // The key is the groupby columns, which are the leading columns. // TBD This approach could potential // put all values in on bucket. - uint64_t hash = rowgroup::hashRow(rowIn, hashLens[0] - 1); + // The fAggregator->hasRollup() is true when we perform one-phase + // aggregation and also are doing subtotals' computations. + // Subtotals produce new keys whose hash values may not be in + // the processing bucket. Consider case for key tuples (1,2) and (1,3). + // Their subtotals's keys will be (1, NULL) and (1, NULL) + // but they will be left in their processing buckets and never + // gets aggregated properly. + // Due to this, we put all rows into the same bucket 0 when perfoming + // single-phase aggregation with subtotals. + // For all other cases (single-phase without subtotals and two-phase + // aggregation with and without subtotals) fAggregator->hasRollup() is false. + // In these cases we have full parallel processing as expected. + uint64_t hash = fAggregator->hasRollup() ? 0 : rowgroup::hashRow(rowIn, hashLens[0] - 1); int bucketID = hash % fNumOfBuckets; rowBucketVecs[bucketID][0].emplace_back(rowIn.getPointer(), hash); rowIn.nextRow(); diff --git a/mysql-test/columnstore/basic/r/mcs84_rollup.result b/mysql-test/columnstore/basic/r/mcs84_rollup.result index fade3b1b2..d261c0acd 100644 --- a/mysql-test/columnstore/basic/r/mcs84_rollup.result +++ b/mysql-test/columnstore/basic/r/mcs84_rollup.result @@ -56,6 +56,15 @@ Paraguay non-fiction 17790 Senegal NULL 171762 Senegal fiction 27881 Senegal non-fiction 143881 +SELECT country, genre, SUM(sales) FROM (SELECT country, genre, sales FROM booksales) t1 GROUP BY country, genre WITH ROLLUP; +country genre SUM(sales) +NULL NULL 354462 +Paraguay NULL 182700 +Paraguay fiction 164910 +Paraguay non-fiction 17790 +Senegal NULL 171762 +Senegal fiction 27881 +Senegal non-fiction 143881 CREATE TABLE three_cols ( key1 INTEGER, key2 INTEGER, value DECIMAL(38)) ENGINE=COLUMNSTORE; INSERT INTO three_cols(key1, key2, value) VALUES (NULL, NULL, NULL) diff --git a/mysql-test/columnstore/basic/t/mcs84_rollup.test b/mysql-test/columnstore/basic/t/mcs84_rollup.test index e7d445e2c..58093ddfc 100755 --- a/mysql-test/columnstore/basic/t/mcs84_rollup.test +++ b/mysql-test/columnstore/basic/t/mcs84_rollup.test @@ -42,6 +42,9 @@ SELECT year, SUM(sales) FROM booksales GROUP BY year WITH ROLLUP; --sorted_result SELECT country, genre, SUM(sales) FROM booksales GROUP BY country, genre WITH ROLLUP; +--sorted_result +SELECT country, genre, SUM(sales) FROM (SELECT country, genre, sales FROM booksales) t1 GROUP BY country, genre WITH ROLLUP; + CREATE TABLE three_cols ( key1 INTEGER, key2 INTEGER, value DECIMAL(38)) ENGINE=COLUMNSTORE; INSERT INTO three_cols(key1, key2, value) VALUES diff --git a/utils/common/string_prefixes.cpp b/utils/common/string_prefixes.cpp index 07748f05f..8b73a6f92 100644 --- a/utils/common/string_prefixes.cpp +++ b/utils/common/string_prefixes.cpp @@ -42,7 +42,7 @@ int64_t encodeStringPrefix(const uint8_t* str, size_t len, datatypes::Charset& c int64_t encodeStringPrefix_check_null(const uint8_t* str, size_t len, datatypes::Charset& cset) { - if (len < 1 && str == nullptr) + if (len < 1) { return joblist::UBIGINTNULL; } diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index 6317fc0ab..8264642f4 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -425,6 +425,8 @@ class RowAggregation : public messageqcpp::Serializeable void clearRollup() { fRollupFlag = false; } + bool hasRollup() const { return fRollupFlag; } + /** @brief Define content of data to be joined * * This method must be call after setInputOutput() for PM hashjoin case.