feat(primproc,aggregegation)!: Changes for ROLLUP with single-phase aggregation (#3025)

The fix is simple: enable subtotals in single-phase aggregation and disable parallel processing when there are subtotals and aggregation is single-phase.
2025-07-29 08:21:15 +03:00 · 2023-11-28 17:33:02 +03:00
parent 9a84aa8d99
commit 8632c85ecf
5 changed files with 29 additions and 4 deletions
--- a/dbcon/joblist/tupleaggregatestep.cpp
+++ b/dbcon/joblist/tupleaggregatestep.cpp
@ -1503,7 +1503,7 @@ void TupleAggregateStep::prep1PhaseAggregate(JobInfo& jobInfo, vector<RowGroup>&

  RowGroup aggRG(oidsAgg.size(), posAgg, oidsAgg, keysAgg, typeAgg, csNumAgg, scaleAgg, precisionAgg,
                 jobInfo.stringTableThreshold);
-  SP_ROWAGG_UM_t rowAgg(new RowAggregationUM(groupBy, functionVec, jobInfo.rm, jobInfo.umMemLimit, false));
+  SP_ROWAGG_UM_t rowAgg(new RowAggregationUM(groupBy, functionVec, jobInfo.rm, jobInfo.umMemLimit, jobInfo.hasRollup));
  rowAgg->timeZone(jobInfo.timeZone);
  rowgroups.push_back(aggRG);
  aggregators.push_back(rowAgg);
@ -5443,7 +5443,6 @@ void TupleAggregateStep::threadedAggregateRowGroups(uint32_t threadID)
          for (uint32_t i = 0; i < fNumOfBuckets; i++)
          {
            fAggregators[i].reset(fAggregator->clone());
-            fAggregators[i]->clearRollup();
            fAggregators[i]->setInputOutput(fRowGroupIn, &fRowGroupOuts[i]);
          }
        }
@ -5497,7 +5496,19 @@ void TupleAggregateStep::threadedAggregateRowGroups(uint32_t threadID)
              // The key is the groupby columns, which are the leading columns.
              // TBD This approach could potential
              // put all values in on bucket.
-              uint64_t hash = rowgroup::hashRow(rowIn, hashLens[0] - 1);
+	      // The fAggregator->hasRollup() is true when we perform one-phase
+	      // aggregation and also are doing subtotals' computations.
+	      // Subtotals produce new keys whose hash values may not be in
+	      // the processing bucket. Consider case for key tuples (1,2) and (1,3).
+	      // Their subtotals's keys will be (1, NULL) and (1, NULL)
+	      // but they will be left in their processing buckets and never
+	      // gets aggregated properly.
+	      // Due to this, we put all rows into the same bucket 0 when perfoming
+	      // single-phase aggregation with subtotals.
+	      // For all other cases (single-phase without subtotals and two-phase
+	      // aggregation with and without subtotals) fAggregator->hasRollup() is false.
+	      // In these cases we have full parallel processing as expected.
+              uint64_t hash = fAggregator->hasRollup() ? 0 : rowgroup::hashRow(rowIn, hashLens[0] - 1);
              int bucketID = hash % fNumOfBuckets;
              rowBucketVecs[bucketID][0].emplace_back(rowIn.getPointer(), hash);
              rowIn.nextRow();
--- a/mysql-test/columnstore/basic/r/mcs84_rollup.result
+++ b/mysql-test/columnstore/basic/r/mcs84_rollup.result
@ -56,6 +56,15 @@ Paraguay	non-fiction	17790
 Senegal	NULL	171762
 Senegal	fiction	27881
 Senegal	non-fiction	143881
+SELECT country, genre, SUM(sales) FROM (SELECT country, genre, sales FROM booksales) t1 GROUP BY country, genre WITH ROLLUP;
+country	genre	SUM(sales)
+NULL	NULL	354462
+Paraguay	NULL	182700
+Paraguay	fiction	164910
+Paraguay	non-fiction	17790
+Senegal	NULL	171762
+Senegal	fiction	27881
+Senegal	non-fiction	143881
 CREATE TABLE three_cols ( key1 INTEGER, key2 INTEGER, value DECIMAL(38)) ENGINE=COLUMNSTORE;
 INSERT INTO three_cols(key1, key2, value) VALUES
 (NULL, NULL, NULL)
--- a/mysql-test/columnstore/basic/t/mcs84_rollup.test
+++ b/mysql-test/columnstore/basic/t/mcs84_rollup.test
@ -42,6 +42,9 @@ SELECT year, SUM(sales) FROM booksales GROUP BY year WITH ROLLUP;
 --sorted_result
 SELECT country, genre, SUM(sales) FROM booksales GROUP BY country, genre WITH ROLLUP;

+--sorted_result
+SELECT country, genre, SUM(sales) FROM (SELECT country, genre, sales FROM booksales) t1 GROUP BY country, genre WITH ROLLUP;
+
 CREATE TABLE three_cols ( key1 INTEGER, key2 INTEGER, value DECIMAL(38)) ENGINE=COLUMNSTORE;

 INSERT INTO three_cols(key1, key2, value) VALUES
--- a/utils/common/string_prefixes.cpp
+++ b/utils/common/string_prefixes.cpp
@ -42,7 +42,7 @@ int64_t encodeStringPrefix(const uint8_t* str, size_t len, datatypes::Charset& c

 int64_t encodeStringPrefix_check_null(const uint8_t* str, size_t len, datatypes::Charset& cset)
 {
-  if (len < 1 && str == nullptr)
+  if (len < 1)
  {
    return joblist::UBIGINTNULL;
  }
--- a/utils/rowgroup/rowaggregation.h
+++ b/utils/rowgroup/rowaggregation.h
@ -425,6 +425,8 @@ class RowAggregation : public messageqcpp::Serializeable

  void clearRollup() { fRollupFlag = false; }

+  bool hasRollup() const { return fRollupFlag; }
+
  /** @brief Define content of data to be joined
   *
   *    This method must be call after setInputOutput() for PM hashjoin case.