MDEV-30660 Aggregation functions fail to leverage uniqueness property

When executing a statement of the form SELECT AGGR_FN(DISTINCT c1, c2,..,cn) FROM t1, where AGGR_FN is an aggregate function such as COUNT(), AVG() or SUM(), and a unique index exists on table t1 covering some or all of the columns (c1, c2,..,cn), the retrieved values are inherently unique. Consequently, the need for de-duplication imposed by the DISTINCT clause can be eliminated, leading to optimization of aggregation operations. This optimization applies under the following conditions: - only one table involved in the join (not counting const tables) - some arguments of the aggregate function are fields (not functions/subqueries) This optimization extends to queries of the form SELECT AGGR_FN(c1, c2,..,cn) GROUP BY cx,..cy when a unique index covers some or all of the columns (c1, c2,..cn, cx,..cy)
2025-07-30 16:24:05 +03:00 · 2024-01-18 16:18:08 +07:00
parent 0381921e26
commit 15623c7f29
6 changed files with 545 additions and 8 deletions
--- a/mysql-test/main/distinct_notembedded.result
+++ b/mysql-test/main/distinct_notembedded.result
@ -0,0 +1,322 @@
 #
 # MDEV-30660 COUNT DISTINCT seems unnecessarily slow when run on a PK
 #
 set @save_optimizer_trace = @@optimizer_trace;
 SET optimizer_trace='enabled=on';
 CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY, b INT NOT NULL);
 INSERT INTO t1 VALUES (1,1), (2,1), (3,1);
 # Optimization is applied (aggregator=simple):
 SELECT COUNT(DISTINCT a) FROM t1;
 COUNT(DISTINCT a)
 3
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "count(distinct t1.a)",
        "aggregator_type": "simple"
    }
 ]
 SELECT AVG(DISTINCT a), SUM(DISTINCT b) FROM t1;
 AVG(DISTINCT a)	SUM(DISTINCT b)
 2.0000	1
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "avg(distinct t1.a)",
        "aggregator_type": "simple"
    },
    {
        "function": "sum(distinct t1.b)",
        "aggregator_type": "distinct"
    }
 ]
 # Only `a` is unique but it's enough to eliminate DISTINCT:
 SELECT COUNT(DISTINCT b, a) FROM t1;
 COUNT(DISTINCT b, a)
 3
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "count(distinct t1.b,t1.a)",
        "aggregator_type": "simple"
    }
 ]
 SELECT COUNT(DISTINCT a, a + b) FROM t1;
 COUNT(DISTINCT a, a + b)
 3
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "count(distinct t1.a,t1.a + t1.b)",
        "aggregator_type": "simple"
    }
 ]
 SELECT SUM(DISTINCT a), AVG(DISTINCT a), COUNT(DISTINCT a) FROM t1 WHERE a > 1;
 SUM(DISTINCT a)	AVG(DISTINCT a)	COUNT(DISTINCT a)
 5	2.5000	2
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "sum(distinct t1.a)",
        "aggregator_type": "simple"
    },
    {
        "function": "avg(distinct t1.a)",
        "aggregator_type": "simple"
    },
    {
        "function": "count(distinct t1.a)",
        "aggregator_type": "simple"
    }
 ]
 # Optimization is not applied 'cause function argument is not a field
 # (aggregator=distinct):
 SELECT SUM(DISTINCT a + b) FROM t1;
 SUM(DISTINCT a + b)
 9
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "sum(distinct t1.a + t1.b)",
        "aggregator_type": "distinct"
    }
 ]
 SELECT COUNT(DISTINCT b) FROM t1;
 COUNT(DISTINCT b)
 1
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "count(distinct t1.b)",
        "aggregator_type": "distinct"
    }
 ]
 SELECT AVG(DISTINCT b / a) FROM t1;
 AVG(DISTINCT b / a)
 0.61110000
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "avg(distinct t1.b / t1.a)",
        "aggregator_type": "distinct"
    }
 ]
 EXPLAIN SELECT COUNT(DISTINCT (SELECT a)) FROM t1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	index	NULL	PRIMARY	4	NULL	3	Using index
 2	DEPENDENT SUBQUERY	NULL	NULL	NULL	NULL	NULL	NULL	NULL	No tables used
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "count(distinct (/* select#2 */ select t1.a))",
        "aggregator_type": "distinct"
    }
 ]
 CREATE TABLE t2 (a INT);
 INSERT INTO t2 VALUES (1), (2);
 # Optimization is not applied 'cause there is more than one table
 SELECT COUNT(DISTINCT t1.a) FROM t1, t2;
 COUNT(DISTINCT t1.a)
 3
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "count(distinct t1.a)",
        "aggregator_type": "distinct"
    }
 ]
 SELECT AVG(DISTINCT t1.a) FROM t1, t2;
 AVG(DISTINCT t1.a)
 2.0000
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "avg(distinct t1.a)",
        "aggregator_type": "distinct"
    }
 ]
 # Const tables, optimization is applied
 SELECT COUNT(DISTINCT a) FROM t1, (SELECT 1) AS t2;
 COUNT(DISTINCT a)
 3
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "count(distinct t1.a)",
        "aggregator_type": "simple"
    }
 ]
 SELECT AVG(DISTINCT t1.a) FROM (SELECT 1 AS a) AS t2, t1, (SELECT 2 AS a) AS t3;
 AVG(DISTINCT t1.a)
 2.0000
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "avg(distinct t1.a)",
        "aggregator_type": "simple"
    }
 ]
 SELECT COUNT(DISTINCT a) FROM t1, (SELECT 1 UNION SELECT 2) AS t2;
 COUNT(DISTINCT a)
 3
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "count(distinct t1.a)",
        "aggregator_type": "distinct"
    }
 ]
 # Unique index on two columns
 CREATE TABLE t3 (a INT NOT NULL, b INT NOT NULL);
 INSERT INTO t3 VALUES (1,1), (1,2), (1,3), (2,1), (2,2), (3,1), (3,2);
 CREATE UNIQUE INDEX t3_a_b ON t3 (a, b);
 # Optimization is applied:
 SELECT COUNT(DISTINCT a, b) FROM t3;
 COUNT(DISTINCT a, b)
 7
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "count(distinct t3.a,t3.b)",
        "aggregator_type": "simple"
    }
 ]
 SELECT COUNT(DISTINCT b, a) FROM t3;
 COUNT(DISTINCT b, a)
 7
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "count(distinct t3.b,t3.a)",
        "aggregator_type": "simple"
    }
 ]
 SELECT COUNT(DISTINCT b, a) FROM t3 WHERE a < 3;
 COUNT(DISTINCT b, a)
 5
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "count(distinct t3.b,t3.a)",
        "aggregator_type": "simple"
    }
 ]
 # Optimization is applied to one of the functions:
 SELECT COUNT(DISTINCT b), SUM(DISTINCT a), SUM(DISTINCT a + b) FROM t3 GROUP BY a;
 COUNT(DISTINCT b)	SUM(DISTINCT a)	SUM(DISTINCT a + b)
 3	1	9
 2	2	7
 2	3	9
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "count(distinct t3.b)",
        "aggregator_type": "simple"
    },
    {
        "function": "sum(distinct t3.a)",
        "aggregator_type": "distinct"
    },
    {
        "function": "sum(distinct t3.a + t3.b)",
        "aggregator_type": "distinct"
    }
 ]
 # Can't apply optimization 'cause GROUP BY argument is not a field:
 SELECT COUNT(DISTINCT b) FROM t3 GROUP BY a+b;
 COUNT(DISTINCT b)
 1
 2
 3
 1
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "count(distinct t3.b)",
        "aggregator_type": "distinct"
    }
 ]
 # Test merged view
 CREATE VIEW v1 AS SELECT * FROM t1;
 # Optimization is applied
 SELECT COUNT(DISTINCT a, b) FROM v1;
 COUNT(DISTINCT a, b)
 3
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "count(distinct t1.a,t1.b)",
        "aggregator_type": "simple"
    }
 ]
 # GROUP_CONCAT implements non-standard distinct aggregator
 SELECT GROUP_CONCAT(b) FROM t1;
 GROUP_CONCAT(b)
 1,1,1
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "group_concat(t1.b separator ',')",
        "aggregator_type": "simple"
    }
 ]
 SELECT GROUP_CONCAT(DISTINCT b) FROM t1;
 GROUP_CONCAT(DISTINCT b)
 1
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 JS
 [
    {
        "function": "group_concat(distinct t1.b separator ',')",
        "aggregator_type": "distinct"
    }
 ]
 DROP TABLE t1, t2, t3;
 DROP VIEW v1;
 SET optimizer_trace = @save_optimizer_trace;
 #
 # end of 10.5 tests
 #
--- a/mysql-test/main/distinct_notembedded.test
+++ b/mysql-test/main/distinct_notembedded.test
@ -0,0 +1,109 @@
 # Embedded doesn't have optimizer trace:
 --source include/not_embedded.inc
 --source include/have_sequence.inc
 --echo #
 --echo # MDEV-30660 COUNT DISTINCT seems unnecessarily slow when run on a PK
 --echo #
 set @save_optimizer_trace = @@optimizer_trace;
 SET optimizer_trace='enabled=on';
 let $trace=
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '\$**.prepare_sum_aggregators')) AS JS
  FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY, b INT NOT NULL);
 INSERT INTO t1 VALUES (1,1), (2,1), (3,1);
 --echo # Optimization is applied (aggregator=simple):
 SELECT COUNT(DISTINCT a) FROM t1;
 eval $trace;
 SELECT AVG(DISTINCT a), SUM(DISTINCT b) FROM t1;
 eval $trace;
 --echo # Only `a` is unique but it's enough to eliminate DISTINCT:
 SELECT COUNT(DISTINCT b, a) FROM t1;
 eval $trace;
 SELECT COUNT(DISTINCT a, a + b) FROM t1;
 eval $trace;
 SELECT SUM(DISTINCT a), AVG(DISTINCT a), COUNT(DISTINCT a) FROM t1 WHERE a > 1;
 eval $trace;
 --echo # Optimization is not applied 'cause function argument is not a field
 --echo # (aggregator=distinct):
 SELECT SUM(DISTINCT a + b) FROM t1;
 eval $trace;
 SELECT COUNT(DISTINCT b) FROM t1;
 eval $trace;
 SELECT AVG(DISTINCT b / a) FROM t1;
 eval $trace;
 EXPLAIN SELECT COUNT(DISTINCT (SELECT a)) FROM t1;
 eval $trace;
 CREATE TABLE t2 (a INT);
 INSERT INTO t2 VALUES (1), (2);
 --echo # Optimization is not applied 'cause there is more than one table
 SELECT COUNT(DISTINCT t1.a) FROM t1, t2;
 eval $trace;
 SELECT AVG(DISTINCT t1.a) FROM t1, t2;
 eval $trace;
 --echo # Const tables, optimization is applied
 SELECT COUNT(DISTINCT a) FROM t1, (SELECT 1) AS t2;
 eval $trace;
 SELECT AVG(DISTINCT t1.a) FROM (SELECT 1 AS a) AS t2, t1, (SELECT 2 AS a) AS t3;
 eval $trace;
 SELECT COUNT(DISTINCT a) FROM t1, (SELECT 1 UNION SELECT 2) AS t2;
 eval $trace;
 --echo # Unique index on two columns
 CREATE TABLE t3 (a INT NOT NULL, b INT NOT NULL);
 INSERT INTO t3 VALUES (1,1), (1,2), (1,3), (2,1), (2,2), (3,1), (3,2);
 CREATE UNIQUE INDEX t3_a_b ON t3 (a, b);
 --echo # Optimization is applied:
 SELECT COUNT(DISTINCT a, b) FROM t3;
 eval $trace;
 SELECT COUNT(DISTINCT b, a) FROM t3;
 eval $trace;
 SELECT COUNT(DISTINCT b, a) FROM t3 WHERE a < 3;
 eval $trace;
 --echo # Optimization is applied to one of the functions:
 SELECT COUNT(DISTINCT b), SUM(DISTINCT a), SUM(DISTINCT a + b) FROM t3 GROUP BY a;
 eval $trace;
 --echo # Can't apply optimization 'cause GROUP BY argument is not a field:
 SELECT COUNT(DISTINCT b) FROM t3 GROUP BY a+b;
 eval $trace;
 --echo # Test merged view
 CREATE VIEW v1 AS SELECT * FROM t1;
 --echo # Optimization is applied
 SELECT COUNT(DISTINCT a, b) FROM v1;
 eval $trace;
 --echo # GROUP_CONCAT implements non-standard distinct aggregator
 SELECT GROUP_CONCAT(b) FROM t1;
 eval $trace;
 SELECT GROUP_CONCAT(DISTINCT b) FROM t1;
 eval $trace;
 DROP TABLE t1, t2, t3;
 DROP VIEW v1;
 SET optimizer_trace = @save_optimizer_trace;
 --echo #
 --echo # end of 10.5 tests
 --echo #
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@ -1496,6 +1496,12 @@ EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3  group by a	{
          },
          {
            "test_if_skip_sort_order": []
          },
          {
            "prepare_sum_aggregators": {
              "function": "min(t1.d)",
              "aggregator_type": "simple"
            }
          }
        ]
      }
@ -1693,6 +1699,18 @@ EXPLAIN SELECT id,MIN(a),MAX(a) FROM t1 WHERE a>=20010104e0 GROUP BY id	{
          },
          {
            "test_if_skip_sort_order": []
          },
          {
            "prepare_sum_aggregators": {
              "function": "min(t1.a)",
              "aggregator_type": "simple"
            }
          },
          {
            "prepare_sum_aggregators": {
              "function": "max(t1.a)",
              "aggregator_type": "simple"
            }
          }
        ]
      }
--- a/sql/item_sum.h
+++ b/sql/item_sum.h
@ -597,6 +597,17 @@ public:
  bool with_sum_func() const { return true; }
  virtual void set_partition_row_count(ulonglong count) { DBUG_ASSERT(0); }
  /*
    While most Item_sum descendants employ standard aggregators configured
    through Item_sum::set_aggregator() call, there are exceptions like
    Item_func_group_concat, which implements its own custom aggregators for
    deduplication values.
    This function distinguishes between the use of standard and custom
    aggregators by the object
  */
  virtual bool uses_non_standard_aggregator_for_distinct() const
  { return false; }
 };
@ -1952,6 +1963,9 @@ protected:
    { return f->val_str(tmp, key + offset); }
  virtual void cut_max_length(String *result,
                              uint old_length, uint max_length) const;
  bool uses_non_standard_aggregator_for_distinct() const override
    { return distinct; }
 public:
  // Methods used by ColumnStore
  bool get_distinct() const { return distinct; }
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@ -278,7 +278,6 @@ static void update_tmptable_sum_func(Item_sum **func,TABLE *tmp_table);
 static void copy_sum_funcs(Item_sum **func_ptr, Item_sum **end);
 static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab);
 static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr);
 static bool prepare_sum_aggregators(Item_sum **func_ptr, bool need_distinct);
 static bool init_sum_functions(Item_sum **func, Item_sum **end);
 static bool update_sum_func(Item_sum **func);
 static void select_describe(JOIN *join, bool need_tmp_table,bool need_order,
@ -3656,7 +3655,7 @@ bool JOIN::make_aggr_tables_info()
      {
        if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true))
          DBUG_RETURN(true);
-        if (prepare_sum_aggregators(sum_funcs,
+        if (prepare_sum_aggregators(thd, sum_funcs,
                                    !join_tab->is_using_agg_loose_index_scan()))
          DBUG_RETURN(true);
        group_list= NULL;
@ -3766,7 +3765,7 @@ bool JOIN::make_aggr_tables_info()
    }
    if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true))
      DBUG_RETURN(true);
-    if (prepare_sum_aggregators(sum_funcs,
+    if (prepare_sum_aggregators(thd, sum_funcs,
                                !join_tab ||
                                !join_tab-> is_using_agg_loose_index_scan()))
      DBUG_RETURN(true);
@ -3947,8 +3946,8 @@ JOIN::create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *table_fields,
      goto err;
    if (make_sum_func_list(all_fields, fields_list, true))
      goto err;
-    if (prepare_sum_aggregators(sum_funcs,
+    if (prepare_sum_aggregators(thd, sum_funcs,
-                                !(tables_list && 
+                                !(tables_list &&
                                  join_tab->is_using_agg_loose_index_scan())))
      goto err;
    if (setup_sum_funcs(thd, sum_funcs))
@ -3957,7 +3956,7 @@ JOIN::create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *table_fields,
  }
  else
  {
-    if (prepare_sum_aggregators(sum_funcs,
+    if (prepare_sum_aggregators(thd, sum_funcs,
                                !join_tab->is_using_agg_loose_index_scan()))
      goto err;
    if (setup_sum_funcs(thd, sum_funcs))
@ -26406,13 +26405,86 @@ static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr)
 }
-static bool prepare_sum_aggregators(Item_sum **func_ptr, bool need_distinct)
+/*
  @brief
    Setup aggregate functions.
  @param thd            Thread descriptor
  @param func_ptr       Array of pointers to aggregate functions
  @param need_distinct  FALSE means that the table access method already
                        guarantees that arguments of all aggregate functions
                        will be unique. (This is the case for Loose Scan)
                        TRUE - Otherwise.
  @return
     false  Ok
     true   Error
 */
 bool JOIN::prepare_sum_aggregators(THD *thd, Item_sum **func_ptr,
                                   bool need_distinct)
 {
  Item_sum *func;
  DBUG_ENTER("prepare_sum_aggregators");
  while ((func= *(func_ptr++)))
  {
-    if (func->set_aggregator(need_distinct && func->has_with_distinct() ?
+    bool need_distinct_aggregator= need_distinct && func->has_with_distinct();
    if (need_distinct_aggregator && table_count - const_tables == 1)
    {
      /*
        We are doing setup for an aggregate with DISTINCT, like
          SELECT agg_func(DISTINCT col1, col2 ...) FROM ...
        In general case, agg_func will need to use Aggregator_distinct to
        remove duplicates from its arguments.
        We won't have to remove duplicates if we know the arguments are already
        unique. This is true when
        1. the join operation has only one non-const table (checked above)
        2. the argument list covers a PRIMARY or a UNIQUE index.
        Example: here the values of t1.pk are unique:
          SELECT agg_func(DISTINCT t1.pk, ...) FROM t1
        and so the whole argument of agg_func is unique.
      */
      List<Item> arg_fields;
      for (uint i= 0; i < func->argument_count(); i++)
      {
        if (func->arguments()[i]->real_item()->type() == Item::FIELD_ITEM)
          arg_fields.push_back(func->arguments()[i]);
      }
      /*
        If the query has a GROUP BY, then it's sufficient that a unique
        key is covered by a concatenation of {argument_list, group_by_list}.
        Example: Suppose t1 has PRIMARY KEY(pk1, pk2). Then:
          SELECT agg_func(DISTINCT t1.pk1, ...) FROM t1 GROUP BY t1.pk2
        Each GROUP BY group will have t1.pk2 fixed. Then, the values of t1.pk1
        will be unique, and no de-duplication will be needed.
      */
      for (ORDER *group= group_list; group ; group= group->next)
      {
        if ((*group->item)->real_item()->type() == Item::FIELD_ITEM)
          arg_fields.push_back(*group->item);
      }
      if (list_contains_unique_index(join_tab[const_tables].table,
                                     find_field_in_item_list,
                                     (void *) &arg_fields))
        need_distinct_aggregator= false;
    }
    Json_writer_object trace_wrapper(thd);
    Json_writer_object trace_aggr(thd, "prepare_sum_aggregators");
    trace_aggr.add("function", func);
    trace_aggr.add("aggregator_type",
                   (need_distinct_aggregator ||
                    func->uses_non_standard_aggregator_for_distinct()) ?
                      "distinct" : "simple");
    if (func->set_aggregator(need_distinct_aggregator ?
                             Aggregator::DISTINCT_AGGREGATOR :
                             Aggregator::SIMPLE_AGGREGATOR))
      DBUG_RETURN(TRUE);
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@ -1905,6 +1905,8 @@ private:
  bool add_fields_for_current_rowid(JOIN_TAB *cur, List<Item> *fields);
  void free_pushdown_handlers(List<TABLE_LIST>& join_list);
  void init_join_cache_and_keyread();
  bool prepare_sum_aggregators(THD *thd,Item_sum **func_ptr,
                               bool need_distinct);
 };
 enum enum_with_bush_roots { WITH_BUSH_ROOTS, WITHOUT_BUSH_ROOTS};