1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-30 16:24:05 +03:00

MDEV-30660 Aggregation functions fail to leverage uniqueness property

When executing a statement of the form
  SELECT AGGR_FN(DISTINCT c1, c2,..,cn) FROM t1,
where AGGR_FN is an aggregate function such as COUNT(), AVG() or SUM(),
and a unique index exists on table t1 covering some or all of the
columns (c1, c2,..,cn), the retrieved values are inherently unique.
Consequently, the need for de-duplication imposed by the DISTINCT
clause can be eliminated, leading to optimization of aggregation
operations.
This optimization applies under the following conditions:
  - only one table involved in the join (not counting const tables)
  - some arguments of the aggregate function are fields
        (not functions/subqueries)

This optimization extends to queries of the form
  SELECT AGGR_FN(c1, c2,..,cn) GROUP BY cx,..cy
when a unique index covers some or all of the columns
(c1, c2,..cn, cx,..cy)
This commit is contained in:
Oleg Smirnov
2024-01-18 16:18:08 +07:00
parent 0381921e26
commit 15623c7f29
6 changed files with 545 additions and 8 deletions

View File

@ -278,7 +278,6 @@ static void update_tmptable_sum_func(Item_sum **func,TABLE *tmp_table);
static void copy_sum_funcs(Item_sum **func_ptr, Item_sum **end);
static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab);
static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr);
static bool prepare_sum_aggregators(Item_sum **func_ptr, bool need_distinct);
static bool init_sum_functions(Item_sum **func, Item_sum **end);
static bool update_sum_func(Item_sum **func);
static void select_describe(JOIN *join, bool need_tmp_table,bool need_order,
@ -3656,7 +3655,7 @@ bool JOIN::make_aggr_tables_info()
{
if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true))
DBUG_RETURN(true);
if (prepare_sum_aggregators(sum_funcs,
if (prepare_sum_aggregators(thd, sum_funcs,
!join_tab->is_using_agg_loose_index_scan()))
DBUG_RETURN(true);
group_list= NULL;
@ -3766,7 +3765,7 @@ bool JOIN::make_aggr_tables_info()
}
if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true))
DBUG_RETURN(true);
if (prepare_sum_aggregators(sum_funcs,
if (prepare_sum_aggregators(thd, sum_funcs,
!join_tab ||
!join_tab-> is_using_agg_loose_index_scan()))
DBUG_RETURN(true);
@ -3947,8 +3946,8 @@ JOIN::create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *table_fields,
goto err;
if (make_sum_func_list(all_fields, fields_list, true))
goto err;
if (prepare_sum_aggregators(sum_funcs,
!(tables_list &&
if (prepare_sum_aggregators(thd, sum_funcs,
!(tables_list &&
join_tab->is_using_agg_loose_index_scan())))
goto err;
if (setup_sum_funcs(thd, sum_funcs))
@ -3957,7 +3956,7 @@ JOIN::create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *table_fields,
}
else
{
if (prepare_sum_aggregators(sum_funcs,
if (prepare_sum_aggregators(thd, sum_funcs,
!join_tab->is_using_agg_loose_index_scan()))
goto err;
if (setup_sum_funcs(thd, sum_funcs))
@ -26406,13 +26405,86 @@ static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr)
}
static bool prepare_sum_aggregators(Item_sum **func_ptr, bool need_distinct)
/*
@brief
Setup aggregate functions.
@param thd Thread descriptor
@param func_ptr Array of pointers to aggregate functions
@param need_distinct FALSE means that the table access method already
guarantees that arguments of all aggregate functions
will be unique. (This is the case for Loose Scan)
TRUE - Otherwise.
@return
false Ok
true Error
*/
bool JOIN::prepare_sum_aggregators(THD *thd, Item_sum **func_ptr,
bool need_distinct)
{
Item_sum *func;
DBUG_ENTER("prepare_sum_aggregators");
while ((func= *(func_ptr++)))
{
if (func->set_aggregator(need_distinct && func->has_with_distinct() ?
bool need_distinct_aggregator= need_distinct && func->has_with_distinct();
if (need_distinct_aggregator && table_count - const_tables == 1)
{
/*
We are doing setup for an aggregate with DISTINCT, like
SELECT agg_func(DISTINCT col1, col2 ...) FROM ...
In general case, agg_func will need to use Aggregator_distinct to
remove duplicates from its arguments.
We won't have to remove duplicates if we know the arguments are already
unique. This is true when
1. the join operation has only one non-const table (checked above)
2. the argument list covers a PRIMARY or a UNIQUE index.
Example: here the values of t1.pk are unique:
SELECT agg_func(DISTINCT t1.pk, ...) FROM t1
and so the whole argument of agg_func is unique.
*/
List<Item> arg_fields;
for (uint i= 0; i < func->argument_count(); i++)
{
if (func->arguments()[i]->real_item()->type() == Item::FIELD_ITEM)
arg_fields.push_back(func->arguments()[i]);
}
/*
If the query has a GROUP BY, then it's sufficient that a unique
key is covered by a concatenation of {argument_list, group_by_list}.
Example: Suppose t1 has PRIMARY KEY(pk1, pk2). Then:
SELECT agg_func(DISTINCT t1.pk1, ...) FROM t1 GROUP BY t1.pk2
Each GROUP BY group will have t1.pk2 fixed. Then, the values of t1.pk1
will be unique, and no de-duplication will be needed.
*/
for (ORDER *group= group_list; group ; group= group->next)
{
if ((*group->item)->real_item()->type() == Item::FIELD_ITEM)
arg_fields.push_back(*group->item);
}
if (list_contains_unique_index(join_tab[const_tables].table,
find_field_in_item_list,
(void *) &arg_fields))
need_distinct_aggregator= false;
}
Json_writer_object trace_wrapper(thd);
Json_writer_object trace_aggr(thd, "prepare_sum_aggregators");
trace_aggr.add("function", func);
trace_aggr.add("aggregator_type",
(need_distinct_aggregator ||
func->uses_non_standard_aggregator_for_distinct()) ?
"distinct" : "simple");
if (func->set_aggregator(need_distinct_aggregator ?
Aggregator::DISTINCT_AGGREGATOR :
Aggregator::SIMPLE_AGGREGATOR))
DBUG_RETURN(TRUE);