From 86377d078ef130d3da32c5da31131e519822e139 Mon Sep 17 00:00:00 2001 From: Monty Date: Sun, 5 Jul 2015 12:39:46 +0300 Subject: [PATCH] Fixes done while working on MDEV-4119: Fixed several optimizer issues relatied to GROUP BY: a) Refering to a SELECT column in HAVING sometimes calculated it twice, which caused problems with non determinstic functions b) Removing duplicate fields and constants from GROUP BY was done too late for "using index for group by" optimization to work c) EXPLAIN SELECT ... GROUP BY did wrongly show 'Using filesort' in some cases involving "Using index for group-by" a) was fixed by: - Changed last argument to Item::split_sum_func2() from bool to int to allow more flags - Added flag argument to Item::split_sum_func() to allow on to specify if the item was in the SELECT part - Mark all split_sum_func() calls from SELECT with SPLIT_SUM_SELECT - Changed split_sum_func2() to do nothing if called with an argument that is not a sum function and doesn't include sum functions, if we are not an argument to SELECT. This ensures that in a case like select a*sum(b) as f1 from t1 where a=1 group by c having f1 <= 10; That 'a' in the SELECT part is stored as a reference in the temporary table togeher with sum(b) while the 'a' in having isn't (not needed as 'a' is already a reference to a column in the result) b) was fixed by: - Added an extra remove_const() pass for GROUP BY arguments before make_join_statistics() in case of one table SELECT. This allowes get_best_group_min_max() to optimize things better. c) was fixed by: - Added test for group by optimization in JOIN::exec_inner for select->quick->get_type() == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX item.cc: - Simplifed Item::split_sum_func2() - Split test to make them faster and easier to read - Changed last argument to Item::split_sum_func2() from bool to int to allow more flags - Added flag argument to Item::split_sum_func() to allow on to specify if the item was in the SELECT part - Changed split_sum_func2() to do nothing if called with an argument that is not a sum function and doesn't include sum functions, if we are not an argument to SELECT. opt_range.cc: - Simplified get_best_group_min_max() by calcuating first how many group_by elements. - Use join->group instead of join->group_list to test if group by, as join->group_list may be NULL if everything was optimized away. sql_select.cc: - Added an extra remove_const() pass for GROUP BY arguments before make_join_statistics() in case of one table SELECT. - Use group instead of group_list to test if group by, as group_list may be NULL if everything was optimized away. - Moved printing of "Error in remove_const" to remove_const() instead of having it in caller. - Simplified some if tests by re-ordering code. - update_depend_map_for_order() and remove_const() fixed to handle the case where make_join_statistics() has not yet been called (join->join_tab is 0 in this case) --- mysql-test/r/group_by.result | 88 ++++++++++++++++++- mysql-test/t/group_by.test | 30 ++++++- sql/item.cc | 121 ++++++++++++++++---------- sql/item.h | 8 +- sql/item_cmpfunc.cc | 5 +- sql/item_cmpfunc.h | 3 +- sql/item_func.cc | 5 +- sql/item_func.h | 3 +- sql/item_row.cc | 5 +- sql/item_row.h | 3 +- sql/opt_range.cc | 26 +++--- sql/sql_base.cc | 3 +- sql/sql_select.cc | 164 ++++++++++++++++++++++------------- sql/sql_select.h | 1 + 14 files changed, 329 insertions(+), 136 deletions(-) diff --git a/mysql-test/r/group_by.result b/mysql-test/r/group_by.result index 38abbfef261..483d4d1ca8e 100644 --- a/mysql-test/r/group_by.result +++ b/mysql-test/r/group_by.result @@ -522,6 +522,7 @@ NULL 9 3 b 1 drop table t1; +set big_tables=0; create table t1 (a int not null, b int not null); insert into t1 values (1,1),(1,2),(3,1),(3,2),(2,2),(2,1); create table t2 (a int not null, b int not null, key(a)); @@ -659,7 +660,10 @@ insert into t1 (a,b) values (1,2),(1,3),(2,5); select a, 0.1*0+1 r2, sum(1) r1 from t1 where a = 1 group by a having r1>1 and r2=1; a r2 r1 1 1.0 2 -select a, round(rand(100)*10) r2, sum(1) r1 from t1 where a = 1 group by a having r1>1 and r2<=2; +select a, round(rand(100)*10) r2, sum(1) r1 from t1 where a = 1 group by a having r1>1 and r2<=2; +a r2 r1 +1 2 2 +select a, round(rand(100)*10) r2, sum(1) r1 from t1 group by a having r1>1 and r2<=2; a r2 r1 1 2 2 select a,sum(b) from t1 where a=1 group by c; @@ -668,6 +672,12 @@ a sum(b) select a*sum(b) from t1 where a=1 group by c; a*sum(b) 5 +select a*sum(b) as f1 from t1 where a=1 group by c having f1 <= 10; +f1 +5 +select a,a*sum(b) as f1 from t1 where a=1 group by c having a*sum(b)+0 <= 10; +a f1 +1 5 select sum(a)*sum(b) from t1 where a=1 group by c; sum(a)*sum(b) 10 @@ -1988,12 +1998,12 @@ SHOW SESSION STATUS LIKE 'Sort_scan%'; Variable_name Value Sort_scan 0 EXPLAIN SELECT SQL_BIG_RESULT col1 AS field1, col1 AS field2 -FROM t1 GROUP BY field1, field2;; +FROM t1 GROUP BY field1, field2; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 index NULL idx 5 NULL 20 Using index; Using filesort FLUSH STATUS; SELECT SQL_BIG_RESULT col1 AS field1, col1 AS field2 -FROM t1 GROUP BY field1, field2;; +FROM t1 GROUP BY field1, field2; field1 field2 1 1 2 2 @@ -2082,6 +2092,58 @@ f1 f2 19 19 20 20 explain +select col1 f1, col1 f2 from t1 group by f1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL idx 5 NULL 20 Using index +select col1 f1, col1 f2 from t1 group by f1; +f1 f2 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +explain +select col1 f1, col1 f2 from t1 group by f1, f2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL idx 5 NULL 20 Using index +select col1 f1, col1 f2 from t1 group by f1, f2; +f1 f2 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +explain select col1 f1, col1 f2 from t1 group by f2 order by f2, f1; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 index NULL idx 5 NULL 20 Using index @@ -2141,6 +2203,22 @@ INSERT INTO t2(col1, col2) VALUES (1,20),(2,19),(3,18),(4,17),(5,16),(6,15),(7,14),(8,13),(9,12),(10,11), (11,10),(12,9),(13,8),(14,7),(15,6),(16,5),(17,4),(18,3),(19,2),(20,1); explain +select col1 f1, col2 f2, col1 f3 from t2 group by f1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range NULL idx 5 NULL 7 Using index for group-by +explain +select SQL_BIG_RESULT col1 f1, col2 f2, col1 f3 from t2 group by f1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range NULL idx 5 NULL 7 Using index for group-by +explain +select col1 f1, col2 f2, col1 f3 from t2 group by f1, f2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 index NULL idx 10 NULL 20 Using index +explain +select col1 f1, col1 f2 from t2 group by f1, 1+1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range NULL idx 5 NULL 7 Using index for group-by +explain select col1 f1, col2 f2, col1 f3 from t2 group by f1, f2, f3+0; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 index NULL idx 10 NULL 20 Using index; Using temporary; Using filesort @@ -2167,6 +2245,10 @@ f1 f2 f3 19 2 19 20 1 20 explain +select col1 f1, col2 f2, col1 f3 from t2 order by f1,f2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 index NULL idx 10 NULL 20 Using index +explain select col1 f1, col2 f2, col1 f3 from t2 order by f1, f2, f3+0; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 index NULL idx 10 NULL 20 Using index; Using filesort diff --git a/mysql-test/t/group_by.test b/mysql-test/t/group_by.test index 4b992faa306..531cec6b730 100644 --- a/mysql-test/t/group_by.test +++ b/mysql-test/t/group_by.test @@ -394,6 +394,7 @@ select a,count(*) from t1 group by a; set big_tables=1; select a,count(*) from t1 group by a; drop table t1; +set big_tables=0; # # Test of GROUP BY ... ORDER BY NULL optimization @@ -485,9 +486,12 @@ create table t1 (a integer, b integer, c integer); insert into t1 (a,b) values (1,2),(1,3),(2,5); select a, 0.1*0+1 r2, sum(1) r1 from t1 where a = 1 group by a having r1>1 and r2=1; # rand(100)*10 will be < 2 only for the first row (of 6) -select a, round(rand(100)*10) r2, sum(1) r1 from t1 where a = 1 group by a having r1>1 and r2<=2; +select a, round(rand(100)*10) r2, sum(1) r1 from t1 where a = 1 group by a having r1>1 and r2<=2; +select a, round(rand(100)*10) r2, sum(1) r1 from t1 group by a having r1>1 and r2<=2; select a,sum(b) from t1 where a=1 group by c; select a*sum(b) from t1 where a=1 group by c; +select a*sum(b) as f1 from t1 where a=1 group by c having f1 <= 10; +select a,a*sum(b) as f1 from t1 where a=1 group by c having a*sum(b)+0 <= 10; select sum(a)*sum(b) from t1 where a=1 group by c; select a,sum(b) from t1 where a=1 group by c having a=1; select a as d,sum(b) from t1 where a=1 group by c having d=1; @@ -1346,9 +1350,9 @@ let $query=SELECT SQL_BIG_RESULT col1 AS field1, col1 AS field2 FROM t1 GROUP BY field1, field2; # Needs to be range to exercise bug ---eval EXPLAIN $query; +--eval EXPLAIN $query FLUSH STATUS; ---eval $query; +--eval $query SHOW SESSION STATUS LIKE 'Sort_scan%'; CREATE VIEW v1 AS SELECT * FROM t1; @@ -1368,6 +1372,14 @@ explain select col1 f1, col1 f2 from t1 order by f2, f1+0; select col1 f1, col1 f2 from t1 order by f2, f1+0; +explain +select col1 f1, col1 f2 from t1 group by f1; +select col1 f1, col1 f2 from t1 group by f1; + +explain +select col1 f1, col1 f2 from t1 group by f1, f2; +select col1 f1, col1 f2 from t1 group by f1, f2; + explain select col1 f1, col1 f2 from t1 group by f2 order by f2, f1; select col1 f1, col1 f2 from t1 group by f2 order by f2, f1; @@ -1376,6 +1388,7 @@ explain select col1 f1, col1 f2 from t1 group by f1, f2 order by f2, f1; select col1 f1, col1 f2 from t1 group by f1, f2 order by f2, f1; + CREATE TABLE t2( col1 int, col2 int, @@ -1385,10 +1398,21 @@ INSERT INTO t2(col1, col2) VALUES (1,20),(2,19),(3,18),(4,17),(5,16),(6,15),(7,14),(8,13),(9,12),(10,11), (11,10),(12,9),(13,8),(14,7),(15,6),(16,5),(17,4),(18,3),(19,2),(20,1); +explain +select col1 f1, col2 f2, col1 f3 from t2 group by f1; +explain +select SQL_BIG_RESULT col1 f1, col2 f2, col1 f3 from t2 group by f1; +explain +select col1 f1, col2 f2, col1 f3 from t2 group by f1, f2; +explain +select col1 f1, col1 f2 from t2 group by f1, 1+1; + explain select col1 f1, col2 f2, col1 f3 from t2 group by f1, f2, f3+0; select col1 f1, col2 f2, col1 f3 from t2 group by f1, f2, f3+0; +explain +select col1 f1, col2 f2, col1 f3 from t2 order by f1,f2; explain select col1 f1, col2 f2, col1 f3 from t2 order by f1, f2, f3+0; select col1 f1, col2 f2, col1 f3 from t2 order by f1, f2, f3+0; diff --git a/sql/item.cc b/sql/item.cc index b4de9732b59..3b1f8b77e82 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -1705,69 +1705,98 @@ public: @param thd Thread handler @param ref_pointer_array Pointer to array of reference fields - @param fields All fields in select + @param fields All fields in select @param ref Pointer to item - @param skip_registered <=> function be must skipped for registered - SUM items + @param split_flags Zero or more of the following flags + SPLIT_FUNC_SKIP_REGISTERED: + Function be must skipped for registered SUM + SUM items + SPLIT_FUNC_SELECT + We are called on the select level and have to + register items operated on sum function @note - This is from split_sum_func2() for items that should be split - All found SUM items are added FIRST in the fields list and we replace the item with a reference. + If this is an item in the SELECT list then we also have to split out + all arguments to functions used together with the sum function. + For example in case of SELECT A*sum(B) we have to split out both + A and sum(B). + This is not needed for ORDER BY, GROUP BY or HAVING as all references + to items in the select list are already of type REF + thd->fatal_error() may be called if we are out of memory */ void Item::split_sum_func2(THD *thd, Item **ref_pointer_array, List &fields, Item **ref, - bool skip_registered) + uint split_flags) { - /* An item of type Item_sum is registered <=> ref_by != 0 */ - if (type() == SUM_FUNC_ITEM && skip_registered && - ((Item_sum *) this)->ref_by) - return; - if ((type() != SUM_FUNC_ITEM && with_sum_func) || - (type() == FUNC_ITEM && - (((Item_func *) this)->functype() == Item_func::ISNOTNULLTEST_FUNC || - ((Item_func *) this)->functype() == Item_func::TRIG_COND_FUNC))) + if (unlikely(type() == SUM_FUNC_ITEM)) { - /* Will split complicated items and ignore simple ones */ - split_sum_func(thd, ref_pointer_array, fields); + /* An item of type Item_sum is registered if ref_by != 0 */ + if ((split_flags & SPLIT_SUM_SKIP_REGISTERED) && + ((Item_sum *) this)->ref_by) + return; } - else if ((type() == SUM_FUNC_ITEM || (used_tables() & ~PARAM_TABLE_BIT)) && - type() != SUBSELECT_ITEM && - (type() != REF_ITEM || - ((Item_ref*)this)->ref_type() == Item_ref::VIEW_REF)) + else { - /* - Replace item with a reference so that we can easily calculate - it (in case of sum functions) or copy it (in case of fields) + /* Not a SUM() function */ + if (unlikely((!with_sum_func && !(split_flags & SPLIT_SUM_SELECT)))) + { + /* + This is not a SUM function and there are no SUM functions inside. + Nothing more to do. + */ + return; + } + if (likely(with_sum_func || + (type() == FUNC_ITEM && + (((Item_func *) this)->functype() == + Item_func::ISNOTNULLTEST_FUNC || + ((Item_func *) this)->functype() == + Item_func::TRIG_COND_FUNC)))) + { + /* Will call split_sum_func2() for all items */ + split_sum_func(thd, ref_pointer_array, fields, split_flags); + return; + } - The test above is to ensure we don't do a reference for things - that are constants (PARAM_TABLE_BIT is in effect a constant) - or already referenced (for example an item in HAVING) - Exception is Item_direct_view_ref which we need to convert to - Item_ref to allow fields from view being stored in tmp table. - */ - Item_aggregate_ref *item_ref; - uint el= fields.elements; - /* - If this is an item_ref, get the original item - This is a safety measure if this is called for things that is - already a reference. - */ - Item *real_itm= real_item(); - - ref_pointer_array[el]= real_itm; - if (!(item_ref= new Item_aggregate_ref(&thd->lex->current_select->context, - ref_pointer_array + el, 0, name))) - return; // fatal_error is set - if (type() == SUM_FUNC_ITEM) - item_ref->depended_from= ((Item_sum *) this)->depended_from(); - fields.push_front(real_itm); - thd->change_item_tree(ref, item_ref); + if (unlikely((!(used_tables() & ~PARAM_TABLE_BIT) || + type() == SUBSELECT_ITEM || + (type() == REF_ITEM && + ((Item_ref*)this)->ref_type() != Item_ref::VIEW_REF)))) + return; } + + /* + Replace item with a reference so that we can easily calculate + it (in case of sum functions) or copy it (in case of fields) + + The test above is to ensure we don't do a reference for things + that are constants (PARAM_TABLE_BIT is in effect a constant) + or already referenced (for example an item in HAVING) + Exception is Item_direct_view_ref which we need to convert to + Item_ref to allow fields from view being stored in tmp table. + */ + Item_aggregate_ref *item_ref; + uint el= fields.elements; + /* + If this is an item_ref, get the original item + This is a safety measure if this is called for things that is + already a reference. + */ + Item *real_itm= real_item(); + + ref_pointer_array[el]= real_itm; + if (!(item_ref= new Item_aggregate_ref(&thd->lex->current_select->context, + ref_pointer_array + el, 0, name))) + return; // fatal_error is set + if (type() == SUM_FUNC_ITEM) + item_ref->depended_from= ((Item_sum *) this)->depended_from(); + fields.push_front(real_itm); + thd->change_item_tree(ref, item_ref); } diff --git a/sql/item.h b/sql/item.h index 825435908c3..44649bf293a 100644 --- a/sql/item.h +++ b/sql/item.h @@ -74,6 +74,10 @@ char_to_byte_length_safe(uint32 char_length_arg, uint32 mbmaxlen_arg) } +/* Bits for the split_sum_func() function */ +#define SPLIT_SUM_SKIP_REGISTERED 1 /* Skip registered funcs */ +#define SPLIT_SUM_SELECT 2 /* SELECT item; Split all parts */ + /* "Declared Type Collation" A combination of collation and its derivation. @@ -1169,10 +1173,10 @@ public: return false; } virtual void split_sum_func(THD *thd, Item **ref_pointer_array, - List &fields) {} + List &fields, uint flags) {} /* Called for items that really have to be split */ void split_sum_func2(THD *thd, Item **ref_pointer_array, List &fields, - Item **ref, bool skip_registered); + Item **ref, uint flags); virtual bool get_date(MYSQL_TIME *ltime, ulonglong fuzzydate); bool get_time(MYSQL_TIME *ltime) { return get_date(ltime, TIME_TIME_ONLY | TIME_INVALID_DATES); } diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 7b34e64436e..caa645d0794 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -4575,12 +4575,13 @@ void Item_cond::traverse_cond(Cond_traverser traverser, */ void Item_cond::split_sum_func(THD *thd, Item **ref_pointer_array, - List &fields) + List &fields, uint flags) { List_iterator li(list); Item *item; while ((item= li++)) - item->split_sum_func2(thd, ref_pointer_array, fields, li.ref(), TRUE); + item->split_sum_func2(thd, ref_pointer_array, fields, li.ref(), + flags | SPLIT_SUM_SKIP_REGISTERED); } diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 7d89cf58afd..ceab59737cf 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -1759,7 +1759,8 @@ public: SARGABLE_PARAM **sargables); SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr); virtual void print(String *str, enum_query_type query_type); - void split_sum_func(THD *thd, Item **ref_pointer_array, List &fields); + void split_sum_func(THD *thd, Item **ref_pointer_array, List &fields, + uint flags); friend int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves, COND **conds); void top_level_item() { abort_on_null=1; } diff --git a/sql/item_func.cc b/sql/item_func.cc index 3e78be8df24..684724b1a9b 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -419,11 +419,12 @@ Item *Item_func::compile(Item_analyzer analyzer, uchar **arg_p, */ void Item_func::split_sum_func(THD *thd, Item **ref_pointer_array, - List &fields) + List &fields, uint flags) { Item **arg, **arg_end; for (arg= args, arg_end= args+arg_count; arg != arg_end ; arg++) - (*arg)->split_sum_func2(thd, ref_pointer_array, fields, arg, TRUE); + (*arg)->split_sum_func2(thd, ref_pointer_array, fields, arg, + flags | SPLIT_SUM_SKIP_REGISTERED); } diff --git a/sql/item_func.h b/sql/item_func.h index 97138cf52a5..7fbe65998ee 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -143,7 +143,8 @@ public: sync_with_sum_func_and_with_field(list); list.empty(); // Fields are used } - void split_sum_func(THD *thd, Item **ref_pointer_array, List &fields); + void split_sum_func(THD *thd, Item **ref_pointer_array, List &fields, + uint flags); virtual void print(String *str, enum_query_type query_type); void print_op(String *str, enum_query_type query_type); void print_args(String *str, uint from, enum_query_type query_type); diff --git a/sql/item_row.cc b/sql/item_row.cc index dea26ebc6ae..8b32f56973a 100644 --- a/sql/item_row.cc +++ b/sql/item_row.cc @@ -101,11 +101,12 @@ void Item_row::cleanup() void Item_row::split_sum_func(THD *thd, Item **ref_pointer_array, - List &fields) + List &fields, uint flags) { Item **arg, **arg_end; for (arg= args, arg_end= args + arg_count; arg != arg_end ; arg++) - (*arg)->split_sum_func2(thd, ref_pointer_array, fields, arg, TRUE); + (*arg)->split_sum_func2(thd, ref_pointer_array, fields, arg, + flags | SPLIT_SUM_SKIP_REGISTERED); } diff --git a/sql/item_row.h b/sql/item_row.h index 1bec6212715..c5cbf0aff8b 100644 --- a/sql/item_row.h +++ b/sql/item_row.h @@ -75,7 +75,8 @@ public: bool fix_fields(THD *thd, Item **ref); void fix_after_pullout(st_select_lex *new_parent, Item **ref); void cleanup(); - void split_sum_func(THD *thd, Item **ref_pointer_array, List &fields); + void split_sum_func(THD *thd, Item **ref_pointer_array, List &fields, + uint flags); table_map used_tables() const { return used_tables_cache; }; bool const_item() const { return const_item_cache; }; enum Item_result result_type() const { return ROW_RESULT; } diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 261b95d2948..09680e87e45 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -12811,6 +12811,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time) uint key_infix_len= 0; /* Length of key_infix. */ TRP_GROUP_MIN_MAX *read_plan= NULL; /* The eventually constructed TRP. */ uint key_part_nr; + uint elements_in_group; ORDER *tmp_group; Item *item; Item_field *item_field; @@ -12892,10 +12893,12 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time) } /* Check (GA4) - that there are no expressions among the group attributes. */ + elements_in_group= 0; for (tmp_group= join->group_list; tmp_group; tmp_group= tmp_group->next) { if ((*tmp_group->item)->real_item()->type() != Item::FIELD_ITEM) DBUG_RETURN(NULL); + elements_in_group++; } /* @@ -12944,8 +12947,16 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time) there are cases Loose Scan over a multi-part index is useful). */ if (!table->covering_keys.is_set(cur_index)) - goto next_index; - + continue; + + /* + This function is called on the precondition that the index is covering. + Therefore if the GROUP BY list contains more elements than the index, + these are duplicates. The GROUP BY list cannot be a prefix of the index. + */ + if (elements_in_group > table->actual_n_key_parts(cur_index_info)) + continue; + /* Unless extended keys can be used for cur_index: If the current storage manager is such that it appends the primary key to @@ -13006,13 +13017,6 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time) else goto next_index; } - /* - This function is called on the precondition that the index is covering. - Therefore if the GROUP BY list contains more elements than the index, - these are duplicates. The GROUP BY list cannot be a prefix of the index. - */ - if (cur_part == end_part && tmp_group) - goto next_index; } /* Check (GA2) if this is a DISTINCT query. @@ -13022,8 +13026,8 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time) Later group_fields_array of ORDER objects is used to convert the query to a GROUP query. */ - if ((!join->group_list && join->select_distinct) || - is_agg_distinct) + if ((!join->group && join->select_distinct) || + is_agg_distinct) { if (!is_agg_distinct) { diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 280666d9fae..cadf2a98cce 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -7935,7 +7935,8 @@ bool setup_fields(THD *thd, Item **ref_pointer_array, *(ref++)= item; if (item->with_sum_func && item->type() != Item::SUM_FUNC_ITEM && sum_func_list) - item->split_sum_func(thd, ref_pointer_array, *sum_func_list); + item->split_sum_func(thd, ref_pointer_array, *sum_func_list, + SPLIT_SUM_SELECT); thd->lex->used_tables|= item->used_tables(); thd->lex->current_select->cur_pos_in_select_list++; } diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 4a500032f11..da994538b6b 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -854,7 +854,7 @@ JOIN::prepare(Item ***rref_pointer_array, real_order= TRUE; if (item->with_sum_func && item->type() != Item::SUM_FUNC_ITEM) - item->split_sum_func(thd, ref_pointer_array, all_fields); + item->split_sum_func(thd, ref_pointer_array, all_fields, 0); } if (!real_order) order= NULL; @@ -862,7 +862,7 @@ JOIN::prepare(Item ***rref_pointer_array, if (having && having->with_sum_func) having->split_sum_func2(thd, ref_pointer_array, all_fields, - &having, TRUE); + &having, SPLIT_SUM_SKIP_REGISTERED); if (select_lex->inner_sum_func_list) { Item_sum *end=select_lex->inner_sum_func_list; @@ -871,7 +871,7 @@ JOIN::prepare(Item ***rref_pointer_array, { item_sum= item_sum->next; item_sum->split_sum_func2(thd, ref_pointer_array, - all_fields, item_sum->ref_by, FALSE); + all_fields, item_sum->ref_by, 0); } while (item_sum != end); } @@ -1334,6 +1334,24 @@ TODO: make view to decide if it is possible to write to WHERE directly or make S /* get_sort_by_table() call used to be here: */ MEM_UNDEFINED(&sort_by_table, sizeof(sort_by_table)); + /* + We have to remove constants and duplicates from group_list before + calling make_join_statistics() as this may call get_best_group_min_max() + which needs a simplfied group_list. + */ + simple_group= 1; + if (group_list && table_count == 1) + { + group_list= remove_const(this, group_list, conds, + rollup.state == ROLLUP::STATE_NONE, + &simple_group); + if (thd->is_error()) + { + error= 1; + DBUG_RETURN(1); + } + } + /* Calculate how to do the join */ THD_STAGE_INFO(thd, stage_statistics); if (make_join_statistics(this, select_lex->leaf_tables, &keyuse) || @@ -1549,7 +1567,6 @@ TODO: make view to decide if it is possible to write to WHERE directly or make S if (thd->is_error()) { error= 1; - DBUG_PRINT("error",("Error from remove_const")); DBUG_RETURN(1); } @@ -1572,14 +1589,14 @@ TODO: make view to decide if it is possible to write to WHERE directly or make S The FROM clause must contain a single non-constant table. */ - if (table_count - const_tables == 1 && (group_list || select_distinct) && + if (table_count - const_tables == 1 && (group || select_distinct) && !tmp_table_param.sum_func_count && (!join_tab[const_tables].select || !join_tab[const_tables].select->quick || join_tab[const_tables].select->quick->get_type() != QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)) { - if (group_list && rollup.state == ROLLUP::STATE_NONE && + if (group && rollup.state == ROLLUP::STATE_NONE && list_contains_unique_index(join_tab[const_tables].table, find_field_in_order_list, (void *) group_list)) @@ -1625,7 +1642,7 @@ TODO: make view to decide if it is possible to write to WHERE directly or make S select_distinct= 0; } } - if (group_list || tmp_table_param.sum_func_count) + if (group || tmp_table_param.sum_func_count) { if (! hidden_group_fields && rollup.state == ROLLUP::STATE_NONE) select_distinct=0; @@ -1690,32 +1707,29 @@ TODO: make view to decide if it is possible to write to WHERE directly or make S else if (thd->is_fatal_error) // End of memory DBUG_RETURN(1); } - simple_group= 0; + if (group) { - ORDER *old_group_list; - group_list= remove_const(this, (old_group_list= group_list), conds, + /* + Update simple_group and group_list as we now have more information, like + which tables or columns are constant. + */ + group_list= remove_const(this, group_list, conds, rollup.state == ROLLUP::STATE_NONE, - &simple_group); + &simple_group); if (thd->is_error()) { error= 1; - DBUG_PRINT("error",("Error from remove_const")); DBUG_RETURN(1); } - if (old_group_list && !group_list) + if (!group_list) { - DBUG_ASSERT(group); + /* The output has only one row */ + order=0; + simple_order=1; select_distinct= 0; + group_optimized_away= 1; } } - if (!group_list && group) - { - order=0; // The output has only one row - simple_order=1; - select_distinct= 0; // No need in distinct for 1 row - group_optimized_away= 1; - } - calc_group_buffer(this, group_list); send_group_parts= tmp_table_param.group_parts; /* Save org parts */ if (procedure && procedure->group) @@ -1725,7 +1739,6 @@ TODO: make view to decide if it is possible to write to WHERE directly or make S if (thd->is_error()) { error= 1; - DBUG_PRINT("error",("Error from remove_const")); DBUG_RETURN(1); } calc_group_buffer(this, group_list); @@ -1876,14 +1889,6 @@ TODO: make view to decide if it is possible to write to WHERE directly or make S if ((select_lex->options & OPTION_SCHEMA_TABLE)) optimize_schema_tables_reads(this); - tmp_having= having; - if (select_options & SELECT_DESCRIBE) - { - error= 0; - goto derived_exit; - } - having= 0; - /* The loose index scan access method guarantees that all grouping or duplicate row elimination (for distinct) is already performed @@ -1907,6 +1912,11 @@ TODO: make view to decide if it is possible to write to WHERE directly or make S error= 0; + tmp_having= having; + if (select_options & SELECT_DESCRIBE) + goto derived_exit; + having= 0; + DBUG_RETURN(0); setup_subq_exit: @@ -1944,8 +1954,9 @@ int JOIN::init_execution() /* Enable LIMIT ROWS EXAMINED during query execution if: (1) This JOIN is the outermost query (not a subquery or derived table) - This ensures that the limit is enabled when actual execution begins, and - not if a subquery is evaluated during optimization of the outer query. + This ensures that the limit is enabled when actual execution begins, + and not if a subquery is evaluated during optimization of the outer + query. (2) This JOIN is not the result of a UNION. In this case do not apply the limit in order to produce the partial query result stored in the UNION temp table. @@ -2561,19 +2572,26 @@ void JOIN::exec_inner() simple_order= simple_group; skip_sort_order= 0; } - bool made_call= false; - if (order && - (order != group_list || !(select_options & SELECT_BIG_RESULT)) && - (const_tables == table_count || - ((simple_order || skip_sort_order) && - (made_call=true) && - test_if_skip_sort_order(&join_tab[const_tables], order, - select_limit, 0, - &join_tab[const_tables].table-> + if (order && join_tab) + { + bool made_call= false; + SQL_SELECT *select= join_tab[const_tables].select; + if ((order != group_list || + !(select_options & SELECT_BIG_RESULT) || + (select && select->quick && + select->quick->get_type() == + QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)) && + (const_tables == table_count || + ((simple_order || skip_sort_order) && + (made_call=true) && + test_if_skip_sort_order(&join_tab[const_tables], order, + select_limit, 0, + &join_tab[const_tables].table-> keys_in_use_for_query)))) - order=0; - if (made_call) - join_tab[const_tables].update_explain_data(const_tables); + order=0; + if (made_call) + join_tab[const_tables].update_explain_data(const_tables); + } having= tmp_having; select_describe(this, need_tmp, order != 0 && !skip_sort_order, @@ -12022,7 +12040,8 @@ static void update_depend_map_for_order(JOIN *join, ORDER *order) order->used= 0; // Not item_sum(), RAND() and no reference to table outside of sub select if (!(order->depend_map & (OUTER_REF_TABLE_BIT | RAND_TABLE_BIT)) - && !order->item[0]->with_sum_func) + && !order->item[0]->with_sum_func && + join->join_tab) { for (JOIN_TAB **tab=join->map2table; depend_map ; @@ -12050,7 +12069,8 @@ static void update_depend_map_for_order(JOIN *join, ORDER *order) @param cond WHERE statement @param change_list Set to 1 if we should remove things from list. If this is not set, then only simple_order is - calculated. + calculated. This is not set when we + are using ROLLUP @param simple_order Set to 1 if we are only using simple expressions. @@ -12062,6 +12082,7 @@ static ORDER * remove_const(JOIN *join,ORDER *first_order, COND *cond, bool change_list, bool *simple_order) { + *simple_order= 1; if (join->table_count == join->const_tables) return change_list ? 0 : first_order; // No need to sort @@ -12072,23 +12093,37 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond, bool first_is_base_table= FALSE; DBUG_ENTER("remove_const"); - if (join->join_tab[join->const_tables].table) - { - first_table= join->join_tab[join->const_tables].table->map; - first_is_base_table= TRUE; - } - /* - Cleanup to avoid interference of calls of this function for - ORDER BY and GROUP BY + Join tab is set after make_join_statistics() has been called. + In case of one table with GROUP BY this function is called before + join_tab is set for the GROUP_BY expression */ - for (JOIN_TAB *tab= join->join_tab + join->const_tables; - tab < join->join_tab + join->table_count; - tab++) - tab->cached_eq_ref_table= FALSE; + if (join->join_tab) + { + if (join->join_tab[join->const_tables].table) + { + first_table= join->join_tab[join->const_tables].table->map; + first_is_base_table= TRUE; + } + + /* + Cleanup to avoid interference of calls of this function for + ORDER BY and GROUP BY + */ + for (JOIN_TAB *tab= join->join_tab + join->const_tables; + tab < join->join_tab + join->table_count; + tab++) + tab->cached_eq_ref_table= FALSE; + + *simple_order= *join->join_tab[join->const_tables].on_expr_ref ? 0 : 1; + } + else + { + first_is_base_table= FALSE; + first_table= 0; // Not used, for gcc + } prev_ptr= &first_order; - *simple_order= *join->join_tab[join->const_tables].on_expr_ref ? 0 : 1; /* NOTE: A variable of not_const_tables ^ first_table; breaks gcc 2.7 */ @@ -12134,7 +12169,8 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond, DBUG_PRINT("info",("removing: %s", order->item[0]->full_name())); continue; } - if (first_is_base_table && (ref=order_tables & (not_const_tables ^ first_table))) + if (first_is_base_table && + (ref=order_tables & (not_const_tables ^ first_table))) { if (!(order_tables & first_table) && only_eq_ref_tables(join,first_order, ref)) @@ -12165,6 +12201,10 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond, *prev_ptr=0; if (prev_ptr == &first_order) // Nothing to sort/group *simple_order=1; +#ifndef DBUG_OFF + if (join->thd->is_error()) + DBUG_PRINT("error",("Error from remove_const")); +#endif DBUG_PRINT("exit",("simple_order: %d",(int) *simple_order)); DBUG_RETURN(first_order); } @@ -20985,6 +21025,8 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order, and thus force sorting on disk unless a group min-max optimization is going to be used as it is applied now only for one table queries with covering indexes. + The expections is if we are already using the index for GROUP BY + (in which case sort would be free) or ORDER and GROUP BY are different. */ if ((order != join->group_list || !(join->select_options & SELECT_BIG_RESULT) || diff --git a/sql/sql_select.h b/sql/sql_select.h index 9cb7e0e61b1..9528442ecf2 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1327,6 +1327,7 @@ public: table_count= 0; top_join_tab_count= 0; const_tables= 0; + const_table_map= 0; eliminated_tables= 0; join_list= 0; implicit_grouping= FALSE;