From 8ec5e13f1f0d56afe42e5ded02baeab7a6a60261 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 30 Sep 2010 18:32:44 +0300 Subject: [PATCH] MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation Phase 3: Implementation of re-optimization of subqueries with injected predicates and cost comparison between Materialization and IN->EXISTS strategies. The commit contains the following known problems: - The implementation of EXPLAIN has not been re-engineered to reflect the changes in subquery optimization. EXPLAIN for subqueries is called during the execute phase, which results in different code paths during JOIN::optimize and thus in differing EXPLAIN messages for constant/system tables. - There are some valgrind warnings that need investigation - Several EXPLAINs with minor differences need to be reconsidered after fixing the EXPLAIN problem above. This patch also adds one extra optimizer_switch: 'in_to_exists' for complete manual control of the subquery execution strategies. --- mysql-test/r/subselect3.result | 7 +- mysql-test/r/subselect3_jcl6.result | 11 +- mysql-test/t/subselect3.test | 3 +- sql/item_subselect.cc | 300 +++++++------------------ sql/item_subselect.h | 45 ++-- sql/mysql_priv.h | 18 +- sql/mysqld.cc | 8 +- sql/opt_subselect.cc | 328 +++++++++++++++++++--------- sql/share/errmsg.txt | 2 + sql/sql_lex.cc | 18 +- sql/sql_select.cc | 299 ++++++++++++++++++------- sql/sql_select.h | 15 +- 12 files changed, 600 insertions(+), 454 deletions(-) diff --git a/mysql-test/r/subselect3.result b/mysql-test/r/subselect3.result index cfc18413ee0..39ae0bbb4de 100644 --- a/mysql-test/r/subselect3.result +++ b/mysql-test/r/subselect3.result @@ -840,7 +840,12 @@ x ROW(11, 12) = (SELECT MAX(x), 22) ROW(11, 12) IN (SELECT MAX(x), 22) 1 0 0 2 0 0 11 0 0 -# 2nd and 3rd columns should be same for x == 11 only +# 2nd and 3rd columns should be same +EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 3 +3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used +2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1; x ROW(11, 12) = (SELECT MAX(x), 12) ROW(11, 12) IN (SELECT MAX(x), 12) 1 0 0 diff --git a/mysql-test/r/subselect3_jcl6.result b/mysql-test/r/subselect3_jcl6.result index d25ca436311..0a969839ed5 100644 --- a/mysql-test/r/subselect3_jcl6.result +++ b/mysql-test/r/subselect3_jcl6.result @@ -844,11 +844,16 @@ x ROW(11, 12) = (SELECT MAX(x), 22) ROW(11, 12) IN (SELECT MAX(x), 22) 1 0 0 2 0 0 11 0 0 -# 2nd and 3rd columns should be same for x == 11 only +# 2nd and 3rd columns should be same +EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 3 +3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used +2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1; x ROW(11, 12) = (SELECT MAX(x), 12) ROW(11, 12) IN (SELECT MAX(x), 12) -1 0 1 -2 0 1 +1 0 0 +2 0 0 11 1 1 DROP TABLE t1; # both columns should be same diff --git a/mysql-test/t/subselect3.test b/mysql-test/t/subselect3.test index a621c4c776c..3f63149ce96 100644 --- a/mysql-test/t/subselect3.test +++ b/mysql-test/t/subselect3.test @@ -681,7 +681,8 @@ SELECT a, ROW(11, 12) = (SELECT a, 12), ROW(11, 12) IN (SELECT a, 12) FROM t1; # The x alias is used below to workaround bug #40674. # Regression tests for sum function on outer column in subselect from dual: SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 22), ROW(11, 12) IN (SELECT MAX(x), 22) FROM t1; ---echo # 2nd and 3rd columns should be same for x == 11 only +--echo # 2nd and 3rd columns should be same +EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1; SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1; DROP TABLE t1; diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 38d1526f917..0a91e4a2a77 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -62,7 +62,6 @@ void Item_subselect::init(st_select_lex *select_lex, DBUG_PRINT("enter", ("select_lex: 0x%lx", (long) select_lex)); unit= select_lex->master_unit(); thd= unit->thd; - is_min_max_optimized= FALSE; if (unit->item) { /* @@ -160,8 +159,8 @@ void Item_in_subselect::cleanup() } first_execution= TRUE; is_constant= FALSE; - if (exec_method == MATERIALIZATION) - exec_method= NOT_TRANSFORMED; + if (in_strategy & SUBS_MATERIALIZATION) + in_strategy= 0; pushed_cond_guards= NULL; Item_subselect::cleanup(); DBUG_VOID_RETURN; @@ -618,7 +617,7 @@ bool Item_in_subselect::exec() - on a cost-based basis, that takes into account the cost of a cache lookup, the cache hit rate, and the savings per cache hit. */ - if (!left_expr_cache && exec_method == MATERIALIZATION) + if (!left_expr_cache && (in_strategy & SUBS_MATERIALIZATION)) init_left_expr_cache(); /* @@ -1072,7 +1071,7 @@ Item_in_subselect::Item_in_subselect(Item * left_exp, st_select_lex *select_lex): Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE), is_constant(FALSE), optimizer(0), pushed_cond_guards(NULL), - exec_method(NOT_TRANSFORMED), upper_item(0) + in_strategy(0), upper_item(0) { DBUG_ENTER("Item_in_subselect::Item_in_subselect"); left_expr= left_exp; @@ -1488,7 +1487,6 @@ Item_in_subselect::single_value_transformer(JOIN *join) } /* fix fields is already called for left expression */ substitution= func->create(left_expr, subs); - is_min_max_optimized= TRUE; DBUG_RETURN(RES_OK); } @@ -1555,8 +1553,21 @@ Item_in_subselect::single_value_transformer(JOIN *join) } +bool Item_in_subselect::fix_having(Item *having, SELECT_LEX *select_lex) +{ + bool fix_res= 0; + if (!having->fixed) + { + select_lex->having_fix_field= 1; + fix_res= having->fix_fields(thd, 0); + select_lex->having_fix_field= 0; + } + return fix_res; +} + + /** - Transofrm an IN predicate into EXISTS via predicate injection. + Transform an IN predicate into EXISTS via predicate injection. @details The transformation injects additional predicates into the subquery (and makes the subquery correlated) as follows. @@ -1607,6 +1618,9 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join, DBUG_ENTER("Item_in_subselect::create_single_in_to_exists_cond"); + *where_item= NULL; + *having_item= NULL; + if (join_having || select_lex->with_sum_func || select_lex->group_list.elements) { @@ -1626,11 +1640,11 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join, item= new Item_func_trig_cond(item, get_cond_guard(0)); } - if (item->fix_fields(thd, 0)) + if (!join_having) + item->name= (char*) in_having_cond; + if (fix_having(item, select_lex)) DBUG_RETURN(RES_ERROR); - *having_item= item; - *where_item= NULL; } else { @@ -1651,10 +1665,9 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join, get_cond_guard(0)))) DBUG_RETURN(RES_ERROR); } - - if (having->fix_fields(thd, 0)) - DBUG_RETURN(RES_ERROR); - + having->name= (char*) in_having_cond; + if (fix_having(having, select_lex)) + DBUG_RETURN(RES_ERROR); *having_item= having; item= new Item_cond_or(item, @@ -1670,20 +1683,20 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join, DBUG_RETURN(RES_ERROR); } - if (item->fix_fields(thd, 0)) - DBUG_RETURN(RES_ERROR); - + /* + TODO: figure out why the following is done here in + single_value_transformer but there is no corresponding action in + row_value_transformer? + */ + item->name= (char *) in_additional_cond; + if (!item->fixed && item->fix_fields(thd, 0)) + DBUG_RETURN(RES_ERROR); *where_item= item; } else { if (select_lex->master_unit()->is_union()) { - /* - comparison functions can't be changed during fix_fields() - we can assign select_lex->having here, and pass 0 as last - argument (reference) to fix_fields() - */ Item *new_having= func->create(expr, new Item_ref_null_helper(&select_lex->context, this, @@ -1697,132 +1710,10 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join, DBUG_RETURN(RES_ERROR); } - if (new_having->fix_fields(thd, 0)) - DBUG_RETURN(RES_ERROR); - + new_having->name= (char*) in_having_cond; + if (fix_having(new_having, select_lex)) + DBUG_RETURN(RES_ERROR); *having_item= new_having; - *where_item= NULL; - } - else - { - DBUG_ASSERT(FALSE); - /* TIMOUR TODO */ - *having_item= NULL; - *where_item= (Item*) select_lex->item_list.head(); - } - } - } - - DBUG_RETURN(RES_OK); -} - - - -Item_subselect::trans_res -Item_in_subselect::inject_single_in_to_exists_cond(JOIN * join, - Item *where_item, - Item *having_item) -{ - SELECT_LEX *select_lex= join->select_lex; - /* - The non-transformed HAVING clause of 'join' may be stored differently in - JOIN::optimize: - this->tmp_having= this->having - this->having= 0; - */ - Item* join_having= join->having ? join->having : join->tmp_having; - bool fix_res= 0; - DBUG_ENTER("Item_in_subselect::inject_single_in_to_exists_cond"); - - if (join_having || select_lex->with_sum_func || - select_lex->group_list.elements) - { - /* - AND and comparison functions can't be changed during fix_fields() - we can assign select_lex->having here, and pass 0 as last - argument (reference) to fix_fields() - */ - thd->change_item_tree(&select_lex->having, and_items(join_having, having_item)); - join->having= select_lex->having; - if (join->having == having_item) - having_item->name= (char*)in_having_cond; - select_lex->having_fix_field= 1; - /* - we do not check join->having->fixed, because Item_and (from and_items) - or comparison function (from func->create) can't be fixed after creation - */ - if (!join->having->fixed) - fix_res= join->having->fix_fields(thd, 0); - select_lex->having_fix_field= 0; - if (fix_res) - DBUG_RETURN(RES_ERROR); - } - else - { - if (select_lex->table_list.elements) - { - if (!abort_on_null && select_lex->item_list.head()->maybe_null) - { - /* - Item_is_not_null_test can't be changed during fix_fields() - we can assign select_lex->having here, and pass 0 as last - argument (reference) to fix_fields() - */ - having_item->name= (char*)in_having_cond; - thd->change_item_tree(&select_lex->having, having_item); - join->having= select_lex->having; - select_lex->having_fix_field= 1; - /* - we do not check join->having->fixed, because Item_and (from - and_items) or comparison function (from func->create) can't be - fixed after creation - */ - if (!join->having->fixed) - fix_res= join->having->fix_fields(thd, 0); - select_lex->having_fix_field= 0; - if (fix_res) - DBUG_RETURN(RES_ERROR); - } - /* - TODO: figure out why the following is done here in - single_value_transformer but there is no corresponding action in - row_value_transformer? - */ - where_item->name= (char *)in_additional_cond; - - /* - AND can't be changed during fix_fields() - we can assign select_lex->having here, and pass 0 as last - argument (reference) to fix_fields() - */ - thd->change_item_tree(&select_lex->where, and_items(join->conds, where_item)); - join->conds= select_lex->where; - select_lex->where->top_level_item(); - /* - we do not check join->conds->fixed, because Item_and can't be fixed - after creation - */ - if (!join->conds->fixed && join->conds->fix_fields(thd, 0)) - DBUG_RETURN(RES_ERROR); - } - else - { - if (select_lex->master_unit()->is_union()) - { - having_item->name= (char*)in_having_cond; - thd->change_item_tree(&select_lex->having, having_item); - join->having= select_lex->having; - select_lex->having_fix_field= 1; - - /* - we do not check join->having->fixed, because comparison function - (from func->create) can't be fixed after creation - */ - if (!join->having->fixed) - fix_res= join->having->fix_fields(thd, 0); - select_lex->having_fix_field= 0; - if (fix_res) - DBUG_RETURN(RES_ERROR); } else DBUG_ASSERT(FALSE); @@ -1992,7 +1883,6 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join, item_having_part2->top_level_item(); } *having_item= and_items(*having_item, item_having_part2); - (*having_item)->top_level_item(); } else { @@ -2074,66 +1964,22 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join, } *where_item= and_items(*where_item, item); } - (*where_item)->fix_fields(thd, 0); } - DBUG_RETURN(RES_OK); -} - - -Item_subselect::trans_res -Item_in_subselect::inject_row_in_to_exists_cond(JOIN * join, - Item *where_item, - Item *having_item) -{ - SELECT_LEX *select_lex= join->select_lex; - /* - The non-transformed HAVING clause of 'join' may be stored differently in - JOIN::optimize: - this->tmp_having= this->having - this->having= 0; - */ - Item* join_having= join->having ? join->having : join->tmp_having; - bool is_having_used= (join_having || select_lex->with_sum_func || - select_lex->group_list.first || - !select_lex->table_list.elements); - - DBUG_ENTER("Item_in_subselect::inject_row_in_to_exists_cond"); - - if (!is_having_used) + if (*where_item) { - /* - AND can't be changed during fix_fields() - we can assign select_lex->where here, and pass 0 as last - argument (reference) to fix_fields() - */ - thd->change_item_tree(&select_lex->where, and_items(join->conds, where_item)); - join->conds= select_lex->where; - select_lex->where->top_level_item(); - if (!join->conds->fixed && join->conds->fix_fields(thd, 0)) + if (!(*where_item)->fixed && (*where_item)->fix_fields(thd, 0)) DBUG_RETURN(RES_ERROR); + (*where_item)->top_level_item(); } - if (having_item) + if (*having_item) { - bool res; - thd->change_item_tree(&select_lex->having, - and_items(join_having, having_item)); - join->having= select_lex->having; - if (having_item == select_lex->having) - having_item->name= (char*)in_having_cond; - select_lex->having->top_level_item(); - /* - AND can't be changed during fix_fields() - we can assign select_lex->having here, and pass 0 as last - argument (reference) to fix_fields() - */ - select_lex->having_fix_field= 1; - if (!join->having->fixed) - res= join->having->fix_fields(thd, 0); - select_lex->having_fix_field= 0; - if (res) + if (!join_having) + (*having_item)->name= (char*) in_having_cond; + if (fix_having(*having_item, select_lex)) DBUG_RETURN(RES_ERROR); + (*having_item)->top_level_item(); } DBUG_RETURN(RES_OK); @@ -2175,20 +2021,36 @@ bool Item_in_subselect::create_in_to_exists_cond(JOIN *join_arg) } -bool Item_in_subselect::inject_in_to_exists_cond(JOIN * join_arg) +bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg) { - Item_subselect::trans_res res; + SELECT_LEX *select_lex= join_arg->select_lex; + Item *where_item= join_arg->in_to_exists_where; + Item *having_item= join_arg->in_to_exists_having; - if (left_expr->cols() == 1) - res= inject_single_in_to_exists_cond(join_arg, - join_arg->in_to_exists_where, - join_arg->in_to_exists_having); - else - res= inject_row_in_to_exists_cond(join_arg, - join_arg->in_to_exists_where, - join_arg->in_to_exists_having); + DBUG_ENTER("Item_in_subselect::inject_in_to_exists_cond"); - return (res != RES_OK && res != RES_REDUCE); + if (where_item) + { + where_item= and_items(join_arg->conds, where_item); + if (!where_item->fixed && where_item->fix_fields(thd, 0)) + DBUG_RETURN(true); + thd->change_item_tree(&select_lex->where, where_item); + select_lex->where->top_level_item(); + join_arg->conds= select_lex->where; + } + + if (having_item) + { + Item* join_having= join_arg->having ? join_arg->having:join_arg->tmp_having; + having_item= and_items(join_having, having_item); + if (fix_having(having_item, select_lex)) + DBUG_RETURN(true); + thd->change_item_tree(&select_lex->having, having_item); + select_lex->having->top_level_item(); + join_arg->having= select_lex->having; + } + + DBUG_RETURN(false); } @@ -2301,7 +2163,7 @@ err: void Item_in_subselect::print(String *str, enum_query_type query_type) { - if (exec_method == IN_TO_EXISTS) + if (in_strategy & SUBS_IN_TO_EXISTS) str->append(STRING_WITH_LEN("")); else { @@ -2317,7 +2179,7 @@ bool Item_in_subselect::fix_fields(THD *thd_arg, Item **ref) uint outer_cols_num; List *inner_cols; - if (exec_method == SEMI_JOIN) + if (in_strategy & SUBS_SEMI_JOIN) return !( (*ref)= new Item_int(1)); /* @@ -2498,10 +2360,7 @@ bool Item_in_subselect::init_cond_guards() Callback to test if an IN predicate is expensive. @details - IN predicates are considered expensive only if they will be executed via - materialization. The return value affects the behavior of - make_cond_for_table() in such a way that it is unchanged when we use - the IN=>EXISTS transformation to compute IN. + The return value affects the behavior of make_cond_for_table(). @retval TRUE if the predicate is expensive @retval FALSE otherwise @@ -2509,7 +2368,8 @@ bool Item_in_subselect::init_cond_guards() bool Item_in_subselect::is_expensive_processor(uchar *arg) { - return TRUE; // exec_method == MATERIALIZATION; + /* TIMOUR: TODO: decide on a cost basis whether it is expensive or not. */ + return TRUE; } @@ -2517,7 +2377,7 @@ Item_subselect::trans_res Item_allany_subselect::select_transformer(JOIN *join) { DBUG_ENTER("Item_allany_subselect::select_transformer"); - exec_method= IN_TO_EXISTS; + in_strategy= SUBS_IN_TO_EXISTS; if (upper_item) upper_item->show= 1; DBUG_RETURN(select_in_like_transformer(join)); @@ -2526,7 +2386,7 @@ Item_allany_subselect::select_transformer(JOIN *join) void Item_allany_subselect::print(String *str, enum_query_type query_type) { - if (exec_method == IN_TO_EXISTS) + if (in_strategy & SUBS_IN_TO_EXISTS) str->append(STRING_WITH_LEN("")); else { diff --git a/sql/item_subselect.h b/sql/item_subselect.h index d35d036e9ea..308fbc6b20a 100644 --- a/sql/item_subselect.h +++ b/sql/item_subselect.h @@ -108,9 +108,6 @@ public: /* subquery is transformed */ bool changed; - /* TIMOUR: this is temporary, remove it. */ - bool is_min_max_optimized; - /* TRUE <=> The underlying SELECT is correlated w.r.t some ancestor select */ bool is_correlated; @@ -121,6 +118,12 @@ public: Item_subselect(); virtual subs_type substype() { return UNKNOWN_SUBS; } + bool is_in_predicate() + { + return (substype() == Item_subselect::IN_SUBS || + substype() == Item_subselect::ALL_SUBS || + substype() == Item_subselect::ANY_SUBS); + } /* We need this method, because some compilers do not allow 'this' @@ -314,6 +317,18 @@ public: }; +/* + Possible methods to execute an IN predicate. These are set by the optimizer + based on user-set optimizer switches, syntactic analysis and cost comparison. +*/ +#define SUBS_NOT_TRANSFORMED 0 /* No execution method was chosen for this IN. */ +#define SUBS_SEMI_JOIN 1 /* IN was converted to semi-join. */ +#define SUBS_IN_TO_EXISTS 2 /* IN was converted to correlated EXISTS. */ +#define SUBS_MATERIALIZATION 4 /* Execute IN via subquery materialization. */ +/* Partial matching substrategies of MATERIALIZATION. */ +#define SUBS_PARTIAL_MATCH_ROWID_MERGE 8 +#define SUBS_PARTIAL_MATCH_TABLE_SCAN 16 + /** Representation of IN subquery predicates of the form "left_expr IN (SELECT ...)". @@ -362,19 +377,13 @@ protected: trans_res select_in_like_transformer(JOIN *join); trans_res single_value_transformer(JOIN *join); trans_res row_value_transformer(JOIN * join); + bool fix_having(Item *having, st_select_lex *select_lex); trans_res create_single_in_to_exists_cond(JOIN * join, Item **where_item, Item **having_item); - trans_res inject_single_in_to_exists_cond(JOIN * join, - Item *where_item, - Item *having_item); - trans_res create_row_in_to_exists_cond(JOIN * join, Item **where_item, Item **having_item); - trans_res inject_row_in_to_exists_cond(JOIN * join, - Item *where_item, - Item *having_item); public: Item *left_expr; /* Priority of this predicate in the convert-to-semi-join-nest process. */ @@ -407,14 +416,8 @@ public: */ bool sjm_scan_allowed; - /* The method chosen to execute the IN predicate. */ - enum enum_exec_method { - NOT_TRANSFORMED, /* No execution method was chosen for this IN. */ - SEMI_JOIN, /* IN was converted to semi-join nest and should be removed. */ - IN_TO_EXISTS, /* IN was converted to correlated EXISTS. */ - MATERIALIZATION /* IN will be executed via subquery materialization. */ - }; - enum_exec_method exec_method; + /* A bitmap of possible execution strategies for an IN predicate. */ + uchar in_strategy; bool *get_cond_guard(int i) { @@ -433,7 +436,7 @@ public: Item_in_subselect() :Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE), is_constant(FALSE), optimizer(0), abort_on_null(0), - pushed_cond_guards(NULL), func(NULL), exec_method(NOT_TRANSFORMED), + pushed_cond_guards(NULL), func(NULL), in_strategy(0), upper_item(0) {} void cleanup(); @@ -446,8 +449,8 @@ public: was_null= 0; } trans_res select_transformer(JOIN *join); - bool create_in_to_exists_cond(JOIN * join_arg); - bool inject_in_to_exists_cond(JOIN * join_arg); + bool create_in_to_exists_cond(JOIN *join_arg); + bool inject_in_to_exists_cond(JOIN *join_arg); virtual bool exec(); longlong val_int(); diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index ba60bab9b50..cc2740c6e8b 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -563,20 +563,20 @@ protected: #define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION 4 #define OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT 8 #define OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN 16 - #define OPTIMIZER_SWITCH_FIRSTMATCH 32 #define OPTIMIZER_SWITCH_LOOSE_SCAN 64 #define OPTIMIZER_SWITCH_MATERIALIZATION 128 -#define OPTIMIZER_SWITCH_SEMIJOIN 256 -#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 512 -#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN 1024 -#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<11) +#define OPTIMIZER_SWITCH_IN_TO_EXISTS 256 +#define OPTIMIZER_SWITCH_SEMIJOIN 512 +#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 1024 +#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN (1<<11) +#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<12) #ifdef DBUG_OFF -# define OPTIMIZER_SWITCH_LAST (1<<12) -#else -# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<12) # define OPTIMIZER_SWITCH_LAST (1<<13) +#else +# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<13) +# define OPTIMIZER_SWITCH_LAST (1<<14) #endif #ifdef DBUG_OFF @@ -589,6 +589,7 @@ protected: OPTIMIZER_SWITCH_FIRSTMATCH | \ OPTIMIZER_SWITCH_LOOSE_SCAN | \ OPTIMIZER_SWITCH_MATERIALIZATION | \ + OPTIMIZER_SWITCH_IN_TO_EXISTS | \ OPTIMIZER_SWITCH_SEMIJOIN | \ OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\ OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\ @@ -603,6 +604,7 @@ protected: OPTIMIZER_SWITCH_FIRSTMATCH | \ OPTIMIZER_SWITCH_LOOSE_SCAN | \ OPTIMIZER_SWITCH_MATERIALIZATION | \ + OPTIMIZER_SWITCH_IN_TO_EXISTS | \ OPTIMIZER_SWITCH_SEMIJOIN | \ OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\ OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\ diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 1593a584454..44fcd76ce47 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -304,7 +304,7 @@ static const char *optimizer_switch_names[]= "index_merge","index_merge_union","index_merge_sort_union", "index_merge_intersection", "index_condition_pushdown", - "firstmatch","loosescan","materialization", "semijoin", + "firstmatch","loosescan","materialization","in_to_exists","semijoin", "partial_match_rowid_merge", "partial_match_table_scan", "subquery_cache", @@ -325,6 +325,7 @@ static const unsigned int optimizer_switch_names_len[]= sizeof("firstmatch") - 1, sizeof("loosescan") - 1, sizeof("materialization") - 1, + sizeof("in_to_exists") - 1, sizeof("semijoin") - 1, sizeof("partial_match_rowid_merge") - 1, sizeof("partial_match_table_scan") - 1, @@ -412,9 +413,10 @@ static const char *optimizer_switch_str="index_merge=on,index_merge_union=on," "firstmatch=on," "loosescan=on," "materialization=on," + "in_to_exists=on," "semijoin=on," "partial_match_rowid_merge=on," - "partial_match_table_scan=on," + "partial_match_table_scan=on," "subquery_cache=on" #ifndef DBUG_OFF ",table_elimination=on"; @@ -7233,7 +7235,7 @@ The minimum value for this variable is 4096.", {"optimizer_switch", OPT_OPTIMIZER_SWITCH, "optimizer_switch=option=val[,option=val...], where option={index_merge, " "index_merge_union, index_merge_sort_union, index_merge_intersection, " - "index_condition_pushdown, firstmatch, loosescan, materialization, " + "index_condition_pushdown, firstmatch, loosescan, materialization, in_to_exists, " "semijoin, partial_match_rowid_merge, partial_match_table_scan, " "subquery_cache" #ifndef DBUG_OFF diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 0c7746ed72a..e93478c5195 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -165,7 +165,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join) thd->thd_marker.emb_on_expr_nest && // 5 select_lex->outer_select()->join && // 6 parent_unit->first_select()->leaf_tables && // 7 - in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED && // 8 + !in_subs->in_strategy && // 8 select_lex->outer_select()->leaf_tables && // 9 !((join->select_options | // 10 select_lex->outer_select()->join->select_options) // 10 @@ -185,65 +185,71 @@ int check_and_do_in_subquery_rewrites(JOIN *join) else { DBUG_PRINT("info", ("Subquery can't be converted to semi-join")); - /* - Check if the subquery predicate can be executed via materialization. - The required conditions are: - 1. Subquery predicate is an IN/=ANY subq predicate - 2. Subquery is a single SELECT (not a UNION) - 3. Subquery is not a table-less query. In this case there is no - point in materializing. - 3A The upper query is not a table-less SELECT ... FROM DUAL. We + if (!optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS) && + !optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION)) + my_error(ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES, MYF(0)); + + if (in_subs) + { + /* Subquery predicate is an IN/=ANY predicate. */ + if (optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS)) + in_subs->in_strategy|= SUBS_IN_TO_EXISTS; + if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION)) + in_subs->in_strategy|= SUBS_MATERIALIZATION; + + /* + Check if the subquery predicate can be executed via materialization. + The required conditions are: + 1. Subquery is a single SELECT (not a UNION) + 2. Subquery is not a table-less query. In this case there is no + point in materializing. + 2A The upper query is not a table-less SELECT ... FROM DUAL. We can't do materialization for SELECT .. FROM DUAL because it does not call setup_subquery_materialization(). We could make SELECT ... FROM DUAL call that function but that doesn't seem to be the case that is worth handling. - 4. Either the subquery predicate is a top-level predicate, or at - least one partial match strategy is enabled. If no partial match - strategy is enabled, then materialization cannot be used for - non-top-level queries because it cannot handle NULLs correctly. - 5. Subquery is non-correlated - TODO: - This is an overly restrictive condition. It can be extended to: - (Subquery is non-correlated || - Subquery is correlated to any query outer to IN predicate || - (Subquery is correlated to the immediate outer query && - Subquery !contains {GROUP BY, ORDER BY [LIMIT], - aggregate functions}) && subquery predicate is not under "NOT IN")) - 6. No execution method was already chosen (by a prepared statement). + 3. Either the subquery predicate is a top-level predicate, or at + least one partial match strategy is enabled. If no partial match + strategy is enabled, then materialization cannot be used for + non-top-level queries because it cannot handle NULLs correctly. + 4. Subquery is non-correlated + TODO: + This is an overly restrictive condition. It can be extended to: + (Subquery is non-correlated || + Subquery is correlated to any query outer to IN predicate || + (Subquery is correlated to the immediate outer query && + Subquery !contains {GROUP BY, ORDER BY [LIMIT], + aggregate functions}) && subquery predicate is not under "NOT IN")) - (*) The subquery must be part of a SELECT statement. The current - condition also excludes multi-table update statements. - - Determine whether we will perform subquery materialization before - calling the IN=>EXISTS transformation, so that we know whether to - perform the whole transformation or only that part of it which wraps - Item_in_subselect in an Item_in_optimizer. - */ - if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION) && - in_subs && // 1 - !select_lex->is_part_of_union() && // 2 - parent_unit->first_select()->leaf_tables && // 3 - thd->lex->sql_command == SQLCOM_SELECT && // * - select_lex->outer_select()->leaf_tables && // 3A - subquery_types_allow_materialization(in_subs) && - // psergey-todo: duplicated_subselect_card_check: where it's done? - (in_subs->is_top_level_item() || - optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) || - optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) &&//4 - !in_subs->is_correlated && // 5 - in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6 - { - /* - Materialization is possible, later the optimize phase of each - subquery will choose either materialization or in-to-exists based - on cost. + (*) The subquery must be part of a SELECT statement. The current + condition also excludes multi-table update statements. */ - in_subs->exec_method= Item_in_subselect::MATERIALIZATION; - } - else if (in_subs) - { - /* Materialization is not possible at all. */ - in_subs->exec_method= Item_in_subselect::IN_TO_EXISTS; + if (!(in_subs->in_strategy & SUBS_MATERIALIZATION && + !select_lex->is_part_of_union() && // 1 + parent_unit->first_select()->leaf_tables && // 2 + thd->lex->sql_command == SQLCOM_SELECT && // * + select_lex->outer_select()->leaf_tables && // 2A + subquery_types_allow_materialization(in_subs) && + // psergey-todo: duplicated_subselect_card_check: where it's done? + (in_subs->is_top_level_item() || //3 + optimizer_flag(thd, + OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) || //3 + optimizer_flag(thd, + OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) && //3 + !in_subs->is_correlated)) //4 + { + /* Materialization is not possible based on syntactic properties. */ + in_subs->in_strategy&= ~SUBS_MATERIALIZATION; + } + + if (!in_subs->in_strategy) + { + /* + If neither materialization is possible, nor the user chose + IN-TO-EXISTS, choose IN-TO-EXISTS as the only universal strategy. + */ + in_subs->in_strategy|= SUBS_IN_TO_EXISTS; + } } /* @@ -533,13 +539,13 @@ skip_conversion: } /* Revert to the IN->EXISTS strategy in the rare case when the subquery could - be flattened. + not be flattened. TODO: This is a limitation done for simplicity. Such subqueries could also be executed via materialization. In order to determine this, we should re-run the test for materialization that was done in check_and_do_in_subquery_rewrites. */ - (*in_subq)->exec_method= Item_in_subselect::IN_TO_EXISTS; + (*in_subq)->in_strategy= SUBS_IN_TO_EXISTS; } if (arena) @@ -800,8 +806,7 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) /* 3. Remove the original subquery predicate from the WHERE/ON */ // The subqueries were replaced for Item_int(1) earlier - subq_pred->exec_method= - Item_in_subselect::SEMI_JOIN; // for subsequent executions + subq_pred->in_strategy= SUBS_SEMI_JOIN; // for subsequent executions /*TODO: also reset the 'with_subselect' there. */ /* n. Adjust the parent_join->tables counter */ @@ -3393,6 +3398,7 @@ int rewrite_to_index_subquery_engine(JOIN *join) JOIN_TAB* join_tab=join->join_tab; SELECT_LEX_UNIT *unit= join->unit; DBUG_ENTER("rewrite_to_index_subquery_engine"); + /* is this simple IN subquery? */ @@ -3564,56 +3570,157 @@ bool JOIN::optimize_unflattened_subqueries() } -bool JOIN::choose_subquery_plan() +bool JOIN::choose_subquery_plan(table_map join_tables) { - double mat_strategy_cost; /* The cost to compute IN via materialization. */ - double in_exists_strategy_cost; /* The cost of the IN->EXISTS strategy. */ - bool res; + /* The original QEP of the subquery. */ + DYNAMIC_ARRAY save_keyuse; /* Copy of the JOIN::keyuse array. */ + POSITION save_best_positions[MAX_TABLES+1]; /* Copy of JOIN::best_positions */ + /* Copies of the JOIN_TAB::keyuse pointers for each JOIN_TAB. */ + KEYUSE *save_join_tab_keyuse[MAX_TABLES]; + /* Copies of JOIN_TAB::checked_keys for each JOIN_TAB. */ + key_map save_join_tab_checked_keys[MAX_TABLES]; - DBUG_ASSERT(in_to_exists_where || in_to_exists_having); - DBUG_ASSERT(select_lex->master_unit()->item && - (select_lex->master_unit()->item->substype() == - Item_subselect::IN_SUBS || - select_lex->master_unit()->item->substype() == - Item_subselect::ALL_SUBS || - select_lex->master_unit()->item->substype() == - Item_subselect::ANY_SUBS)); + bool in_exists_reoptimized= false; + Item_in_subselect *in_subs; - Item_in_subselect *in_subs= (Item_in_subselect*) - select_lex->master_unit()->item; - - /* Always revert to IN->EXISTS. */ - mat_strategy_cost= 0; - in_exists_strategy_cost= 1; - - /* - If (1) materialization is a possible strategy based on static analysis, and - (2) it is cheaper strategy than the IN->EXISTS transformation, then compute - in_subs via the materialization trategy. - */ - if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION && // 1 - mat_strategy_cost < in_exists_strategy_cost) // 2 + if (select_lex->master_unit()->item && + select_lex->master_unit()->item->is_in_predicate()) { - if (in_subs->setup_mat_engine()) - { - /* - In some cases it is not possible to create usable indexes for the - materialization strategy, so fall back to IN->EXISTS. - */ - in_subs->exec_method= Item_in_subselect::IN_TO_EXISTS; - } + in_subs= (Item_in_subselect*) select_lex->master_unit()->item; + if (in_subs->create_in_to_exists_cond(this)) + return true; } else + return false; + + DBUG_ASSERT(in_subs->in_strategy); /* A strategy must be chosen earlier. */ + DBUG_ASSERT(in_to_exists_where || in_to_exists_having); + DBUG_ASSERT(!in_to_exists_where || in_to_exists_where->fixed); + DBUG_ASSERT(!in_to_exists_having || in_to_exists_having->fixed); + + save_keyuse.elements= 0; + save_keyuse.buffer= NULL; + + /* + Compute and compare the costs of materialization and in-exists if both + strategies are possible and allowed by the user (checked during the prepare + phase. + */ + if (in_subs->in_strategy & SUBS_MATERIALIZATION && + in_subs->in_strategy & SUBS_IN_TO_EXISTS) { + JOIN *outer_join= unit->outer_select() ? unit->outer_select()->join : NULL; + JOIN *inner_join= this; + /* Cost of the outer JOIN. */ + double outer_read_time= 0, outer_record_count= 0; + /* Cost of the unmodified subquery. */ + double inner_read_time_1= 0, inner_record_count_1= 0; + /* Cost of the subquery with injected IN-EXISTS predicates. */ + double inner_read_time_2= 0, inner_record_count_2= 0; + /* The cost to compute IN via materialization. */ + double materialize_strategy_cost= 0; + /* The cost of the IN->EXISTS strategy. */ + double in_exists_strategy_cost= 1; + + if (outer_join) + get_partial_join_cost(outer_join, outer_join->tables, + &outer_read_time, &outer_record_count); + else + { + /* TODO: outer_join can be NULL for DELETE statements. */ + outer_read_time= 1; /* TODO */ + outer_record_count= 1; /* TODO */ + } + + get_partial_join_cost(inner_join, inner_join->tables, + &inner_read_time_1, &inner_record_count_1); + + if (in_to_exists_where && const_tables != tables) + { + /* + Re-optimize and cost the subquery taking into account the IN-EXISTS + conditions. + */ + if (save_query_plan(&save_keyuse, save_best_positions, + save_join_tab_keyuse, save_join_tab_checked_keys) || + reoptimize(in_to_exists_where, join_tables, save_best_positions)) + return TRUE; + in_exists_reoptimized= true; + + get_partial_join_cost(inner_join, inner_join->tables, + &inner_read_time_2, &inner_record_count_2); + } + else + { + /* Reoptimization would not produce any better plan. */ + inner_read_time_2= inner_read_time_1; + } + + /* Compute execution costs. */ /* - Previous optimizer phases should have chosen either a materialization - or IN->EXISTS strategy. + 1. Compute the cost of the materialization strategy. */ - DBUG_ASSERT(in_subs->exec_method == Item_in_subselect::IN_TO_EXISTS); + double materialization_cost; /* The cost of executing the subquery and */ + /* storing its result in an indexed temp table.*/ + /* The cost of a lookup into the unique index of the materialized table. */ + double lookup_cost; + double write_row_cost= 1; /* TODO: what is the real cost to write a row? */ + materialization_cost= inner_read_time_1 + + inner_record_count_1 * write_row_cost; + /* + The cost of a hash/btree lookup into a unique index of a materialized + subquery. + TIMOUR: TODO: the block of code below is exact copy/paste from + opt_subselect.cc:optimize_semi_join_nests() - refactor it. + */ + uint rowlen= get_tmp_table_rec_length(unit->first_select()->item_list); + if (rowlen * inner_record_count_1 < thd->variables.max_heap_table_size) + lookup_cost= HEAP_TEMPTABLE_LOOKUP_COST; + else + lookup_cost= DISK_TEMPTABLE_LOOKUP_COST; + materialize_strategy_cost= materialization_cost + + outer_record_count * lookup_cost; + + /* + 2. Compute the cost of the IN=>EXISTS strategy. + */ + in_exists_strategy_cost= outer_record_count * inner_read_time_2; + + /* Compare the costs and choose the cheaper strategy. */ + if (materialize_strategy_cost >= in_exists_strategy_cost) + in_subs->in_strategy&= ~SUBS_MATERIALIZATION; + else + in_subs->in_strategy&= ~SUBS_IN_TO_EXISTS; } - if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION) + /* + If (1) materialization is a possible strategy based on static analysis + during the prepare phase, then if + (2) it is more expensive than the IN->EXISTS transformation, and + (3) it is not possible to create usable indexes for the materialization + strategy, + fall back to IN->EXISTS. + otherwise use materialization. + */ + if (in_subs->in_strategy & SUBS_MATERIALIZATION && + in_subs->setup_mat_engine()) { + /* + If materialization was the cheaper or the only user-selected strategy, + but it is not possible to execute it due to limitations in the + implementation, fall back to IN-TO-EXISTS. + */ + in_subs->in_strategy&= ~SUBS_MATERIALIZATION; + in_subs->in_strategy|= SUBS_IN_TO_EXISTS; + } + + if (in_subs->in_strategy & SUBS_MATERIALIZATION) + { + /* Restore orginal query plan used for materialization. */ + if (in_exists_reoptimized) + restore_query_plan(&save_keyuse, save_best_positions, + save_join_tab_keyuse, save_join_tab_checked_keys); + /* TODO: should we set/unset this flag for both select_lex and its unit? */ in_subs->unit->uncacheable&= ~UNCACHEABLE_DEPENDENT; select_lex->uncacheable&= ~UNCACHEABLE_DEPENDENT; @@ -3633,13 +3740,32 @@ bool JOIN::choose_subquery_plan() set in the beginning of JOIN::optimize, which was already done. */ select_limit= in_subs->unit->select_limit_cnt; - - // TODO: inner_join->restore_plan (keyuse, best_positions, best_read) } - else if (in_subs->exec_method == Item_in_subselect::IN_TO_EXISTS) - res= in_subs->inject_in_to_exists_cond(this); + else if (in_subs->in_strategy & SUBS_IN_TO_EXISTS) + { + /* Keep the new query plan with injected conditions, delete the old one. */ + if (save_keyuse.elements) + { + DBUG_ASSERT(in_exists_reoptimized); + delete_dynamic(&save_keyuse); + } + + if (!in_exists_reoptimized && in_to_exists_where && const_tables != tables) + { + for (uint i= 0; i < tables; i++) + { + join_tab[i].keyuse= NULL; + join_tab[i].checked_keys.clear_all(); + } + if (reoptimize(in_to_exists_where, join_tables, NULL)) + return TRUE; + } + + if (in_subs->inject_in_to_exists_cond(this)) + return TRUE; + } else DBUG_ASSERT(FALSE); - return res; + return FALSE; } diff --git a/sql/share/errmsg.txt b/sql/share/errmsg.txt index 117a4b8e1e6..2152fdbe012 100644 --- a/sql/share/errmsg.txt +++ b/sql/share/errmsg.txt @@ -6245,3 +6245,5 @@ ER_UNKNOWN_OPTION eng "Unknown option '%-.64s'" ER_BAD_OPTION_VALUE eng "Incorrect value '%-.64s' for option '%-.64s'" +ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES + eng "At least one of the 'in_to_exists' or 'materialization' optimizer_switch flags must be 'on'." diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index d9ceb431bb3..bfd24a2491c 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -2157,8 +2157,8 @@ void st_select_lex::print_limit(THD *thd, select_limit == 1, and there should be no offset_limit. */ (((subs_type == Item_subselect::IN_SUBS) && - ((Item_in_subselect*)item)->exec_method == - Item_in_subselect::MATERIALIZATION) ? + ((Item_in_subselect*)item)->in_strategy & + SUBS_MATERIALIZATION) ? TRUE : (select_limit->val_int() == 1LL) && offset_limit == 0)); @@ -3096,25 +3096,11 @@ bool st_select_lex::optimize_unflattened_subqueries() Item_subselect *subquery_predicate= un->item; if (subquery_predicate) { - Item_in_subselect *item_in= NULL; - if (subquery_predicate->substype() == Item_subselect::IN_SUBS || - subquery_predicate->substype() == Item_subselect::ALL_SUBS || - subquery_predicate->substype() == Item_subselect::ANY_SUBS) - item_in= (Item_in_subselect*) subquery_predicate; for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select()) { JOIN *inner_join= sl->join; SELECT_LEX *save_select= un->thd->lex->current_select; int res; - - /* - Make sure that we do not create IN->EXISTS conditions for - subquery predicates that were substituted by Item_maxmin_subselect - or by Item_singlerow_subselect. - */ - DBUG_ASSERT(!item_in || (item_in && !item_in->is_min_max_optimized)); - if (item_in && item_in->create_in_to_exists_cond(inner_join)) - return TRUE; /* We need only 1 row to determine existence */ un->set_limit(un->global_parameters); un->thd->lex->current_select= sl; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 26ec89c7a47..800458e988b 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -53,9 +53,9 @@ static bool make_join_statistics(JOIN *join, TABLE_LIST *leaves, COND *conds, static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse, JOIN_TAB *join_tab, uint tables, COND *conds, - COND_EQUAL *cond_equal, table_map table_map, SELECT_LEX *select_lex, st_sargable_param **sargables); +static bool sort_and_filter_keyuse(DYNAMIC_ARRAY *keyuse); static int sort_keyuse(KEYUSE *a,KEYUSE *b); static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse, table_map used_tables); @@ -928,24 +928,29 @@ JOIN::optimize() error= 0; if (optimize_unflattened_subqueries()) DBUG_RETURN(1); - if (in_to_exists_where || in_to_exists_having) - { - /* - TIMOUR: TODO: refactor this block and JOIN::choose_subquery_plan - */ - Item_in_subselect *in_subs= (Item_in_subselect*) - select_lex->master_unit()->item; + /* + TIMOUR: TODO: consider do we need to optimize here at all and refactor + this block and JOIN::choose_subquery_plan. - if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION) - ; // setup materialized execution structures - else if (in_subs->exec_method == Item_in_subselect::IN_TO_EXISTS) + if (choose_subquery_plan()) + DBUG_RETURN(1); + */ + Item_in_subselect *in_subs; + if (select_lex->master_unit()->item && + select_lex->master_unit()->item->is_in_predicate()) + { + in_subs= (Item_in_subselect*) select_lex->master_unit()->item; + if (in_subs->in_strategy & SUBS_MATERIALIZATION && + in_subs->setup_mat_engine()) + in_subs->in_strategy= SUBS_IN_TO_EXISTS; + if (in_subs->in_strategy & SUBS_IN_TO_EXISTS) { + if (in_subs->create_in_to_exists_cond(this)) + DBUG_RETURN(1); if (in_subs->inject_in_to_exists_cond(this)) DBUG_RETURN(1); tmp_having= having; } - else - DBUG_ASSERT(FALSE); } DBUG_RETURN(0); } @@ -2828,10 +2833,14 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, } if (conds || outer_join) + { if (update_ref_and_keys(join->thd, keyuse_array, stat, join->tables, - conds, join->cond_equal, - ~outer_join, join->select_lex, &sargables)) + conds, ~outer_join, join->select_lex, &sargables)) goto error; + if (keyuse_array->elements && sort_and_filter_keyuse(keyuse_array)) + goto error; + DBUG_EXECUTE("opt", print_keyuse_array(keyuse_array);); + } join->const_table_map= no_rows_const_tables; join->const_tables= const_count; @@ -3135,8 +3144,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, sizeof(POSITION)*join->const_tables); join->best_read=1.0; } - if ((join->in_to_exists_where || join->in_to_exists_having) - && join->choose_subquery_plan()) + if (join->choose_subquery_plan(all_table_map & ~join->const_table_map)) goto error; /* Generate an execution plan from the found optimal join order. */ @@ -4089,11 +4097,10 @@ static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table, static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab, - uint tables, COND *cond, COND_EQUAL *cond_equal, - table_map normal_tables, SELECT_LEX *select_lex, - SARGABLE_PARAM **sargables) + uint tables, COND *cond, table_map normal_tables, + SELECT_LEX *select_lex, SARGABLE_PARAM **sargables) { - uint and_level,i,found_eq_constant; + uint and_level,i; KEY_FIELD *key_fields, *end, *field; uint sz; uint m= max(select_lex->max_equal_elems,1); @@ -4189,67 +4196,76 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab, return TRUE; } - /* - Sort the array of possible keys and remove the following key parts: - - ref if there is a keypart which is a ref and a const. - (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d, - then we skip the key part corresponding to b=t2.d) - - keyparts without previous keyparts - (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is - used in the query, we drop the partial key parts from consideration). - Special treatment for ft-keys. - */ - if (keyuse->elements) - { - KEYUSE key_end,*prev,*save_pos,*use; - - my_qsort(keyuse->buffer,keyuse->elements,sizeof(KEYUSE), - (qsort_cmp) sort_keyuse); - - bzero((char*) &key_end,sizeof(key_end)); /* Add for easy testing */ - if (insert_dynamic(keyuse,(uchar*) &key_end)) - return TRUE; - - use=save_pos=dynamic_element(keyuse,0,KEYUSE*); - prev= &key_end; - found_eq_constant=0; - for (i=0 ; i < keyuse->elements-1 ; i++,use++) - { - if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL) - use->table->const_key_parts[use->key]|= use->keypart_map; - if (use->keypart != FT_KEYPART) - { - if (use->key == prev->key && use->table == prev->table) - { - if (prev->keypart+1 < use->keypart || - (prev->keypart == use->keypart && found_eq_constant)) - continue; /* remove */ - } - else if (use->keypart != 0) // First found must be 0 - continue; - } - -#ifdef HAVE_valgrind - /* Valgrind complains about overlapped memcpy when save_pos==use. */ - if (save_pos != use) -#endif - *save_pos= *use; - prev=use; - found_eq_constant= !use->used_tables; - /* Save ptr to first use */ - if (!use->table->reginfo.join_tab->keyuse) - use->table->reginfo.join_tab->keyuse=save_pos; - use->table->reginfo.join_tab->checked_keys.set_bit(use->key); - save_pos++; - } - i=(uint) (save_pos-(KEYUSE*) keyuse->buffer); - VOID(set_dynamic(keyuse,(uchar*) &key_end,i)); - keyuse->elements=i; - } - DBUG_EXECUTE("opt", print_keyuse_array(keyuse);); return FALSE; } + +/** + Sort the array of possible keys and remove the following key parts: + - ref if there is a keypart which is a ref and a const. + (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d, + then we skip the key part corresponding to b=t2.d) + - keyparts without previous keyparts + (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is + used in the query, we drop the partial key parts from consideration). + Special treatment for ft-keys. +*/ + +static bool sort_and_filter_keyuse(DYNAMIC_ARRAY *keyuse) +{ + KEYUSE key_end, *prev, *save_pos, *use; + uint found_eq_constant, i; + + DBUG_ASSERT(keyuse->elements); + + my_qsort(keyuse->buffer, keyuse->elements, sizeof(KEYUSE), + (qsort_cmp) sort_keyuse); + + bzero((char*) &key_end, sizeof(key_end)); /* Add for easy testing */ + if (insert_dynamic(keyuse, (uchar*) &key_end)) + return TRUE; + + use= save_pos= dynamic_element(keyuse,0,KEYUSE*); + prev= &key_end; + found_eq_constant= 0; + + for (i=0 ; i < keyuse->elements-1 ; i++,use++) + { + if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL) + use->table->const_key_parts[use->key]|= use->keypart_map; + if (use->keypart != FT_KEYPART) + { + if (use->key == prev->key && use->table == prev->table) + { + if (prev->keypart+1 < use->keypart || + (prev->keypart == use->keypart && found_eq_constant)) + continue; /* remove */ + } + else if (use->keypart != 0) // First found must be 0 + continue; + } + +#ifdef HAVE_valgrind + /* Valgrind complains about overlapped memcpy when save_pos==use. */ + if (save_pos != use) +#endif + *save_pos= *use; + prev= use; + found_eq_constant= !use->used_tables; + /* Save ptr to first use */ + if (!use->table->reginfo.join_tab->keyuse) + use->table->reginfo.join_tab->keyuse=save_pos; + use->table->reginfo.join_tab->checked_keys.set_bit(use->key); + save_pos++; + } + i= (uint) (save_pos-(KEYUSE*) keyuse->buffer); + VOID(set_dynamic(keyuse,(uchar*) &key_end,i)); + keyuse->elements= i; + + return FALSE; +} + + /** Update some values in keyuse for faster choose_plan() loop. */ @@ -19265,6 +19281,131 @@ bool JOIN::change_result(select_result *res) DBUG_RETURN(FALSE); } + +/** + Save the original query execution plan so that the caller can revert to it + if needed. +*/ +int JOIN::save_query_plan(DYNAMIC_ARRAY *save_keyuse, + POSITION *save_best_positions, + KEYUSE **save_join_tab_keyuse, + key_map *save_join_tab_checked_keys) +{ + if (keyuse.elements) + { + DYNAMIC_ARRAY tmp_keyuse; + if (my_init_dynamic_array(save_keyuse, sizeof(KEYUSE), 20, 64)) + return 1; + /* Swap the current and the backup keyuse arrays. */ + tmp_keyuse= keyuse; + keyuse= (*save_keyuse); + (*save_keyuse)= tmp_keyuse; + + for (uint i= 0; i < tables; i++) + { + save_join_tab_keyuse[i]= join_tab[i].keyuse; + join_tab[i].keyuse= NULL; + save_join_tab_checked_keys[i]= join_tab[i].checked_keys; + join_tab[i].checked_keys.clear_all(); + } + } + memcpy((uchar*) save_best_positions, (uchar*) best_positions, + sizeof(POSITION) * (tables + 1)); + memset(best_positions, 0, sizeof(POSITION) * (tables + 1)); + return 0; +} + + +/** + Restore the query plan saved before reoptimization with additional + conditions. +*/ + +void JOIN::restore_query_plan(DYNAMIC_ARRAY *save_keyuse, + POSITION *save_best_positions, + KEYUSE **save_join_tab_keyuse, + key_map *save_join_tab_checked_keys) +{ + if (save_keyuse->elements) + { + DYNAMIC_ARRAY tmp_keyuse; + tmp_keyuse= keyuse; + keyuse= (*save_keyuse); + (*save_keyuse)= tmp_keyuse; + delete_dynamic(save_keyuse); + + for (uint i= 0; i < tables; i++) + { + join_tab[i].keyuse= save_join_tab_keyuse[i]; + join_tab[i].checked_keys= save_join_tab_checked_keys[i]; + } + + } + memcpy((uchar*) best_positions, (uchar*) save_best_positions, + sizeof(POSITION) * (tables + 1)); +} + + +/** + Reoptimize a query plan taking into account an additional conjunct to the + WHERE clause. +*/ + +int JOIN::reoptimize(Item *added_where, table_map join_tables, + POSITION *save_best_positions) +{ + DYNAMIC_ARRAY added_keyuse; + SARGABLE_PARAM *sargables= 0; /* Used only as a dummy parameter. */ + + if (my_init_dynamic_array(&added_keyuse, sizeof(KEYUSE), 20, 64)) + { + delete_dynamic(&added_keyuse); + return 1; + } + + /* Re-run the REF optimizer to take into account the new conditions. */ + if (update_ref_and_keys(thd, &added_keyuse, join_tab, tables, added_where, + ~outer_join, select_lex, &sargables)) + { + delete_dynamic(&added_keyuse); + return 1; + } + + if (!added_keyuse.elements) + { + /* No need to optimize if no new access methods were discovered. */ + if (save_best_positions) + memcpy((uchar*) best_positions, (uchar*) save_best_positions, + sizeof(POSITION) * (tables + 1)); + delete_dynamic(&added_keyuse); + return 0; + } + + /* Add the new access methods to the keyuse array. */ + if (!keyuse.buffer && + my_init_dynamic_array(&keyuse, sizeof(KEYUSE), 20, 64)) + { + delete_dynamic(&added_keyuse); + return 1; + } + allocate_dynamic(&keyuse, keyuse.elements + added_keyuse.elements); + memcpy(keyuse.buffer + keyuse.elements * keyuse.size_of_element, + added_keyuse.buffer, + (size_t) added_keyuse.elements * added_keyuse.size_of_element); + keyuse.elements+= added_keyuse.elements; + delete_dynamic(&added_keyuse); + + if (sort_and_filter_keyuse(&keyuse)) + return 1; + optimize_keyuse(this, &keyuse); + + /* Re-run the join optimizer to compute a new query plan. */ + if (choose_plan(this, join_tables)) + return 1; + + return 0; +} + /** @} (end of group Query_Optimizer) */ diff --git a/sql/sql_select.h b/sql/sql_select.h index c87147f6106..5f1abcce87f 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1369,8 +1369,21 @@ inline bool sj_is_materialize_strategy(uint strategy) class JOIN :public Sql_alloc { +private: JOIN(const JOIN &rhs); /**< not implemented */ JOIN& operator=(const JOIN &rhs); /**< not implemented */ + +protected: + /* Support for plan reoptimization with rewritten conditions. */ + int reoptimize(Item *added_where, table_map join_tables, + POSITION *save_best_positions); + int save_query_plan(DYNAMIC_ARRAY *save_keyuse, POSITION *save_positions, + KEYUSE **save_join_tab_keyuse, + key_map *save_join_tab_checked_keys); + void restore_query_plan(DYNAMIC_ARRAY *save_keyuse, POSITION *save_positions, + KEYUSE **save_join_tab_keyuse, + key_map *save_join_tab_checked_keys); + public: JOIN_TAB *join_tab,**best_ref; JOIN_TAB **map2table; ///< mapping between table indexes and JOIN_TABs @@ -1746,7 +1759,7 @@ public: NULL : join_tab+const_tables; } bool setup_subquery_caches(); - bool choose_subquery_plan(); + bool choose_subquery_plan(table_map join_tables); private: /** TRUE if the query contains an aggregate function but has no GROUP