From d7b45e01b47b11daf21af9e68ad762f3a70b915c Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Sat, 19 Aug 2017 15:02:29 -0700 Subject: [PATCH] This patch complements the patch for mdev-10855. It allows to push conditions into derived with window functions not only in the cases when the window specifications of these window functions use the same partition, but also in the cases when the window functions use partitions that share only some fields. In these cases only the conditions over the common fields are pushed. --- mysql-test/r/derived_cond_pushdown.result | 236 ++++++++++++++++++++++ mysql-test/t/derived_cond_pushdown.test | 36 ++++ sql/sql_derived.cc | 10 +- sql/sql_lex.h | 1 + sql/sql_window.cc | 78 +++++++ 5 files changed, 356 insertions(+), 5 deletions(-) diff --git a/mysql-test/r/derived_cond_pushdown.result b/mysql-test/r/derived_cond_pushdown.result index a196709dad1..9c55c64ae32 100644 --- a/mysql-test/r/derived_cond_pushdown.result +++ b/mysql-test/r/derived_cond_pushdown.result @@ -9243,6 +9243,242 @@ EXPLAIN } } } +set statement optimizer_switch='condition_pushdown_for_derived=off' for select * from (select a, c, +sum(b) over (partition by a,c) as sum_b, +avg(b) over (partition by a,c) as avg_b +from t2 ) as t +where t.a > 2 and t.c in ('aa','bb','cc'); +a c sum_b avg_b +7 cc 28 14.0000 +7 cc 28 14.0000 +3 aa 92 46.0000 +7 bb 126 42.0000 +4 aa 15 15.0000 +7 bb 126 42.0000 +7 bb 126 42.0000 +3 bb 40 40.0000 +3 aa 92 46.0000 +select * from (select a, c, +sum(b) over (partition by a,c) as sum_b, +avg(b) over (partition by a,c) as avg_b +from t2 ) as t +where t.a > 2 and t.c in ('aa','bb','cc'); +a c sum_b avg_b +7 cc 28 14.0000 +7 cc 28 14.0000 +3 aa 92 46.0000 +7 bb 126 42.0000 +4 aa 15 15.0000 +7 bb 126 42.0000 +7 bb 126 42.0000 +3 bb 40 40.0000 +3 aa 92 46.0000 +explain select * from (select a, c, +sum(b) over (partition by a,c) as sum_b, +avg(b) over (partition by a,c) as avg_b +from t2 ) as t +where t.a > 2 and t.c in ('aa','bb','cc'); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY ALL NULL NULL NULL NULL 16 Using where +2 DERIVED t2 ALL idx NULL NULL NULL 20 Using where; Using temporary +explain format=json select * from (select a, c, +sum(b) over (partition by a,c) as sum_b, +avg(b) over (partition by a,c) as avg_b +from t2 ) as t +where t.a > 2 and t.c in ('aa','bb','cc'); +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "", + "access_type": "ALL", + "rows": 16, + "filtered": 100, + "attached_condition": "t.a > 2 and t.c in ('aa','bb','cc')", + "materialized": { + "query_block": { + "select_id": 2, + "window_functions_computation": { + "sorts": { + "filesort": { + "sort_key": "t2.a, t2.c" + } + }, + "temporary_table": { + "table": { + "table_name": "t2", + "access_type": "ALL", + "possible_keys": ["idx"], + "rows": 20, + "filtered": 80, + "attached_condition": "t2.a > 2 and t2.c in ('aa','bb','cc')" + } + } + } + } + } + } + } +} +set statement optimizer_switch='condition_pushdown_for_derived=off' for select * from (select a, c, +sum(b) over (partition by a,c) as sum_b, +avg(b) over (partition by a) as avg_b +from t2 ) as t +where t.a > 2 and t.c in ('aa','bb','cc'); +a c sum_b avg_b +7 cc 28 30.8000 +7 cc 28 30.8000 +3 aa 92 44.0000 +7 bb 126 30.8000 +4 aa 15 46.3333 +7 bb 126 30.8000 +7 bb 126 30.8000 +3 bb 40 44.0000 +3 aa 92 44.0000 +select * from (select a, c, +sum(b) over (partition by a,c) as sum_b, +avg(b) over (partition by a) as avg_b +from t2 ) as t +where t.a > 2 and t.c in ('aa','bb','cc'); +a c sum_b avg_b +7 cc 28 30.8000 +7 cc 28 30.8000 +3 aa 92 44.0000 +7 bb 126 30.8000 +4 aa 15 46.3333 +7 bb 126 30.8000 +7 bb 126 30.8000 +3 bb 40 44.0000 +3 aa 92 44.0000 +explain select * from (select a, c, +sum(b) over (partition by a,c) as sum_b, +avg(b) over (partition by a) as avg_b +from t2 ) as t +where t.a > 2 and t.c in ('aa','bb','cc'); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY ALL NULL NULL NULL NULL 16 Using where +2 DERIVED t2 ALL idx NULL NULL NULL 20 Using where; Using temporary +explain format=json select * from (select a, c, +sum(b) over (partition by a,c) as sum_b, +avg(b) over (partition by a) as avg_b +from t2 ) as t +where t.a > 2 and t.c in ('aa','bb','cc'); +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "", + "access_type": "ALL", + "rows": 16, + "filtered": 100, + "attached_condition": "t.a > 2 and t.c in ('aa','bb','cc')", + "materialized": { + "query_block": { + "select_id": 2, + "window_functions_computation": { + "sorts": { + "filesort": { + "sort_key": "t2.a, t2.c" + } + }, + "temporary_table": { + "table": { + "table_name": "t2", + "access_type": "ALL", + "possible_keys": ["idx"], + "rows": 20, + "filtered": 80, + "attached_condition": "t2.a > 2" + } + } + } + } + } + } + } +} +set statement optimizer_switch='condition_pushdown_for_derived=off' for select * from (select a, c, +sum(b) over (partition by a,c) as sum_b, +avg(b) over (partition by c) as avg_b +from t2 ) as t +where t.a > 2 and t.c in ('aa','bb','cc'); +a c sum_b avg_b +7 cc 28 14.0000 +7 cc 28 14.0000 +3 aa 92 35.4000 +7 bb 126 36.5000 +4 aa 15 35.4000 +7 bb 126 36.5000 +7 bb 126 36.5000 +3 bb 40 36.5000 +3 aa 92 35.4000 +select * from (select a, c, +sum(b) over (partition by a,c) as sum_b, +avg(b) over (partition by c) as avg_b +from t2 ) as t +where t.a > 2 and t.c in ('aa','bb','cc'); +a c sum_b avg_b +7 cc 28 14.0000 +7 cc 28 14.0000 +3 aa 92 35.4000 +7 bb 126 36.5000 +4 aa 15 35.4000 +7 bb 126 36.5000 +7 bb 126 36.5000 +3 bb 40 36.5000 +3 aa 92 35.4000 +explain select * from (select a, c, +sum(b) over (partition by a,c) as sum_b, +avg(b) over (partition by c) as avg_b +from t2 ) as t +where t.a > 2 and t.c in ('aa','bb','cc'); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY ALL NULL NULL NULL NULL 20 Using where +2 DERIVED t2 ALL NULL NULL NULL NULL 20 Using where; Using temporary +explain format=json select * from (select a, c, +sum(b) over (partition by a,c) as sum_b, +avg(b) over (partition by c) as avg_b +from t2 ) as t +where t.a > 2 and t.c in ('aa','bb','cc'); +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "", + "access_type": "ALL", + "rows": 20, + "filtered": 100, + "attached_condition": "t.a > 2 and t.c in ('aa','bb','cc')", + "materialized": { + "query_block": { + "select_id": 2, + "window_functions_computation": { + "sorts": { + "filesort": { + "sort_key": "t2.c" + }, + "filesort": { + "sort_key": "t2.a, t2.c" + } + }, + "temporary_table": { + "table": { + "table_name": "t2", + "access_type": "ALL", + "rows": 20, + "filtered": 100, + "attached_condition": "t2.c in ('aa','bb','cc')" + } + } + } + } + } + } + } +} drop table t1,t2; set optimizer_switch= @save_optimizer_switch; # diff --git a/mysql-test/t/derived_cond_pushdown.test b/mysql-test/t/derived_cond_pushdown.test index f82b23a9aa1..f7bdee51206 100644 --- a/mysql-test/t/derived_cond_pushdown.test +++ b/mysql-test/t/derived_cond_pushdown.test @@ -1617,6 +1617,42 @@ eval $q4; eval explain $q4; eval explain format=json $q4; +let $q5= +select * from (select a, c, + sum(b) over (partition by a,c) as sum_b, + avg(b) over (partition by a,c) as avg_b + from t2 ) as t + where t.a > 2 and t.c in ('aa','bb','cc'); + +eval $no_pushdown $q5; +eval $q5; +eval explain $q5; +eval explain format=json $q5; + +let $q6= +select * from (select a, c, + sum(b) over (partition by a,c) as sum_b, + avg(b) over (partition by a) as avg_b + from t2 ) as t + where t.a > 2 and t.c in ('aa','bb','cc'); + +eval $no_pushdown $q6; +eval $q6; +eval explain $q6; +eval explain format=json $q6; + +let $q7= +select * from (select a, c, + sum(b) over (partition by a,c) as sum_b, + avg(b) over (partition by c) as avg_b + from t2 ) as t + where t.a > 2 and t.c in ('aa','bb','cc'); + +eval $no_pushdown $q7; +eval $q7; +eval explain $q7; +eval explain format=json $q7; + drop table t1,t2; set optimizer_switch= @save_optimizer_switch; diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc index ad18e1c4686..bf1a777f50c 100644 --- a/sql/sql_derived.cc +++ b/sql/sql_derived.cc @@ -1251,8 +1251,9 @@ bool pushdown_cond_for_derived(THD *thd, Item *cond, TABLE_LIST *derived) { if (sl->join->group_list || sl->join->implicit_grouping) continue; - if (!(sl->window_specs.elements == 1 && - sl->window_specs.head()->partition_list)) + ORDER *common_partition_fields= + sl->find_common_window_func_partition_fields(thd); + if (!common_partition_fields) continue; extracted_cond_copy= !sl->next_select() ? extracted_cond : @@ -1260,9 +1261,8 @@ bool pushdown_cond_for_derived(THD *thd, Item *cond, TABLE_LIST *derived) if (!extracted_cond_copy) continue; - Item *cond_over_partition_fields; - ORDER *grouping_list= sl->window_specs.head()->partition_list->first; - sl->collect_grouping_fields(thd, grouping_list); + Item *cond_over_partition_fields;; + sl->collect_grouping_fields(thd, common_partition_fields); sl->check_cond_extraction_for_grouping_fields(extracted_cond_copy, derived); cond_over_partition_fields= diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 79f2b111f20..e7bb084d134 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -1246,6 +1246,7 @@ public: } bool have_window_funcs() const { return (window_funcs.elements !=0); } + ORDER *find_common_window_func_partition_fields(THD *thd); bool cond_pushdown_is_allowed() const { return !olap && !explicit_limit; } diff --git a/sql/sql_window.cc b/sql/sql_window.cc index 74e1ad25183..7586bd78fe1 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -1,3 +1,4 @@ +#include "sql_parse.h" #include "sql_select.h" #include "sql_list.h" #include "item_windowfunc.h" @@ -307,6 +308,83 @@ setup_windows(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, DBUG_RETURN(0); } + +/** + @brief + Find fields common for all partition lists used in window functions + + @param thd The thread handle + + @details + This function looks for the field references in the partition lists + of all window functions used in this select that are common for + all the partition lists. The function returns an ORDER list contained + all such references.The list either is specially built by the function + or is taken directly from the first window specification. + + @retval + pointer to the first element of the ORDER list contained field + references common for all partition lists + 0 if no such reference is found. +*/ + +ORDER *st_select_lex::find_common_window_func_partition_fields(THD *thd) +{ + ORDER *ord; + Item *item; + DBUG_ASSERT(window_funcs.elements); + List_iterator_fast it(window_funcs); + Item_window_func *wf= it++; + if (!wf->window_spec->partition_list) + return 0; + List common_fields; + uint first_partition_elements; + for (ord= wf->window_spec->partition_list->first; ord; ord= ord->next) + { + if ((*ord->item)->real_item()->type() == Item::FIELD_ITEM) + common_fields.push_back(*ord->item, thd->mem_root); + first_partition_elements++; + } + if (window_specs.elements == 1 && + common_fields.elements == first_partition_elements) + return wf->window_spec->partition_list->first; + List_iterator li(common_fields); + while (common_fields.elements && (wf= it++)) + { + if (!wf->window_spec->partition_list) + return 0; + while ((item= li++)) + { + for (ord= wf->window_spec->partition_list->first; ord; ord= ord->next) + { + if (item->eq(*ord->item, false)) + break; + } + if (!ord) + li.remove(); + } + li.rewind(); + } + if (!common_fields.elements) + return 0; + if (common_fields.elements == first_partition_elements) + return wf->window_spec->partition_list->first; + SQL_I_List res_list; + it.rewind(); + wf= it++; + for (ord= wf->window_spec->partition_list->first, item= li++; + ord; ord= ord->next) + { + if (item != *ord->item) + continue; + if (add_to_list(thd, res_list, item, ord->direction)) + return 0; + item= li++; + } + return res_list.first; +} + + ///////////////////////////////////////////////////////////////////////////// // Sorting window functions to minimize the number of table scans // performed during the computation of these functions