mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
MDEV-30660 Aggregation functions fail to leverage uniqueness property
When executing a statement of the form SELECT AGGR_FN(DISTINCT c1, c2,..,cn) FROM t1, where AGGR_FN is an aggregate function such as COUNT(), AVG() or SUM(), and a unique index exists on table t1 covering some or all of the columns (c1, c2,..,cn), the retrieved values are inherently unique. Consequently, the need for de-duplication imposed by the DISTINCT clause can be eliminated, leading to optimization of aggregation operations. This optimization applies under the following conditions: - only one table involved in the join (not counting const tables) - some arguments of the aggregate function are fields (not functions/subqueries) This optimization extends to queries of the form SELECT AGGR_FN(c1, c2,..,cn) GROUP BY cx,..cy when a unique index covers some or all of the columns (c1, c2,..cn, cx,..cy)
This commit is contained in:
@ -278,7 +278,6 @@ static void update_tmptable_sum_func(Item_sum **func,TABLE *tmp_table);
|
||||
static void copy_sum_funcs(Item_sum **func_ptr, Item_sum **end);
|
||||
static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab);
|
||||
static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr);
|
||||
static bool prepare_sum_aggregators(Item_sum **func_ptr, bool need_distinct);
|
||||
static bool init_sum_functions(Item_sum **func, Item_sum **end);
|
||||
static bool update_sum_func(Item_sum **func);
|
||||
static void select_describe(JOIN *join, bool need_tmp_table,bool need_order,
|
||||
@ -3656,7 +3655,7 @@ bool JOIN::make_aggr_tables_info()
|
||||
{
|
||||
if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true))
|
||||
DBUG_RETURN(true);
|
||||
if (prepare_sum_aggregators(sum_funcs,
|
||||
if (prepare_sum_aggregators(thd, sum_funcs,
|
||||
!join_tab->is_using_agg_loose_index_scan()))
|
||||
DBUG_RETURN(true);
|
||||
group_list= NULL;
|
||||
@ -3766,7 +3765,7 @@ bool JOIN::make_aggr_tables_info()
|
||||
}
|
||||
if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true))
|
||||
DBUG_RETURN(true);
|
||||
if (prepare_sum_aggregators(sum_funcs,
|
||||
if (prepare_sum_aggregators(thd, sum_funcs,
|
||||
!join_tab ||
|
||||
!join_tab-> is_using_agg_loose_index_scan()))
|
||||
DBUG_RETURN(true);
|
||||
@ -3947,8 +3946,8 @@ JOIN::create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *table_fields,
|
||||
goto err;
|
||||
if (make_sum_func_list(all_fields, fields_list, true))
|
||||
goto err;
|
||||
if (prepare_sum_aggregators(sum_funcs,
|
||||
!(tables_list &&
|
||||
if (prepare_sum_aggregators(thd, sum_funcs,
|
||||
!(tables_list &&
|
||||
join_tab->is_using_agg_loose_index_scan())))
|
||||
goto err;
|
||||
if (setup_sum_funcs(thd, sum_funcs))
|
||||
@ -3957,7 +3956,7 @@ JOIN::create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *table_fields,
|
||||
}
|
||||
else
|
||||
{
|
||||
if (prepare_sum_aggregators(sum_funcs,
|
||||
if (prepare_sum_aggregators(thd, sum_funcs,
|
||||
!join_tab->is_using_agg_loose_index_scan()))
|
||||
goto err;
|
||||
if (setup_sum_funcs(thd, sum_funcs))
|
||||
@ -26406,13 +26405,86 @@ static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr)
|
||||
}
|
||||
|
||||
|
||||
static bool prepare_sum_aggregators(Item_sum **func_ptr, bool need_distinct)
|
||||
/*
|
||||
@brief
|
||||
Setup aggregate functions.
|
||||
|
||||
@param thd Thread descriptor
|
||||
@param func_ptr Array of pointers to aggregate functions
|
||||
@param need_distinct FALSE means that the table access method already
|
||||
guarantees that arguments of all aggregate functions
|
||||
will be unique. (This is the case for Loose Scan)
|
||||
TRUE - Otherwise.
|
||||
@return
|
||||
false Ok
|
||||
true Error
|
||||
*/
|
||||
|
||||
bool JOIN::prepare_sum_aggregators(THD *thd, Item_sum **func_ptr,
|
||||
bool need_distinct)
|
||||
{
|
||||
Item_sum *func;
|
||||
DBUG_ENTER("prepare_sum_aggregators");
|
||||
while ((func= *(func_ptr++)))
|
||||
{
|
||||
if (func->set_aggregator(need_distinct && func->has_with_distinct() ?
|
||||
bool need_distinct_aggregator= need_distinct && func->has_with_distinct();
|
||||
if (need_distinct_aggregator && table_count - const_tables == 1)
|
||||
{
|
||||
/*
|
||||
We are doing setup for an aggregate with DISTINCT, like
|
||||
|
||||
SELECT agg_func(DISTINCT col1, col2 ...) FROM ...
|
||||
|
||||
In general case, agg_func will need to use Aggregator_distinct to
|
||||
remove duplicates from its arguments.
|
||||
We won't have to remove duplicates if we know the arguments are already
|
||||
unique. This is true when
|
||||
1. the join operation has only one non-const table (checked above)
|
||||
2. the argument list covers a PRIMARY or a UNIQUE index.
|
||||
|
||||
Example: here the values of t1.pk are unique:
|
||||
|
||||
SELECT agg_func(DISTINCT t1.pk, ...) FROM t1
|
||||
|
||||
and so the whole argument of agg_func is unique.
|
||||
*/
|
||||
List<Item> arg_fields;
|
||||
for (uint i= 0; i < func->argument_count(); i++)
|
||||
{
|
||||
if (func->arguments()[i]->real_item()->type() == Item::FIELD_ITEM)
|
||||
arg_fields.push_back(func->arguments()[i]);
|
||||
}
|
||||
|
||||
/*
|
||||
If the query has a GROUP BY, then it's sufficient that a unique
|
||||
key is covered by a concatenation of {argument_list, group_by_list}.
|
||||
|
||||
Example: Suppose t1 has PRIMARY KEY(pk1, pk2). Then:
|
||||
|
||||
SELECT agg_func(DISTINCT t1.pk1, ...) FROM t1 GROUP BY t1.pk2
|
||||
|
||||
Each GROUP BY group will have t1.pk2 fixed. Then, the values of t1.pk1
|
||||
will be unique, and no de-duplication will be needed.
|
||||
*/
|
||||
for (ORDER *group= group_list; group ; group= group->next)
|
||||
{
|
||||
if ((*group->item)->real_item()->type() == Item::FIELD_ITEM)
|
||||
arg_fields.push_back(*group->item);
|
||||
}
|
||||
|
||||
if (list_contains_unique_index(join_tab[const_tables].table,
|
||||
find_field_in_item_list,
|
||||
(void *) &arg_fields))
|
||||
need_distinct_aggregator= false;
|
||||
}
|
||||
Json_writer_object trace_wrapper(thd);
|
||||
Json_writer_object trace_aggr(thd, "prepare_sum_aggregators");
|
||||
trace_aggr.add("function", func);
|
||||
trace_aggr.add("aggregator_type",
|
||||
(need_distinct_aggregator ||
|
||||
func->uses_non_standard_aggregator_for_distinct()) ?
|
||||
"distinct" : "simple");
|
||||
if (func->set_aggregator(need_distinct_aggregator ?
|
||||
Aggregator::DISTINCT_AGGREGATOR :
|
||||
Aggregator::SIMPLE_AGGREGATOR))
|
||||
DBUG_RETURN(TRUE);
|
||||
|
Reference in New Issue
Block a user