1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation

Phase 3: Implementation of re-optimization of subqueries with injected predicates
           and cost comparison between Materialization and IN->EXISTS strategies.

The commit contains the following known problems:
- The implementation of EXPLAIN has not been re-engineered to reflect the
  changes in subquery optimization. EXPLAIN for subqueries is called during
  the execute phase, which results in different code paths during JOIN::optimize
  and thus in differing EXPLAIN messages for constant/system tables.
- There are some valgrind warnings that need investigation
- Several EXPLAINs with minor differences need to be reconsidered after fixing
  the EXPLAIN problem above.

This patch also adds one extra optimizer_switch: 'in_to_exists' for complete
manual control of the subquery execution strategies.
This commit is contained in:
unknown
2010-09-30 18:32:44 +03:00
parent 50888477af
commit 8ec5e13f1f
12 changed files with 600 additions and 454 deletions

View File

@@ -840,7 +840,12 @@ x ROW(11, 12) = (SELECT MAX(x), 22) ROW(11, 12) IN (SELECT MAX(x), 22)
1 0 0 1 0 0
2 0 0 2 0 0
11 0 0 11 0 0
# 2nd and 3rd columns should be same for x == 11 only # 2nd and 3rd columns should be same
EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 3
3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1; SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
x ROW(11, 12) = (SELECT MAX(x), 12) ROW(11, 12) IN (SELECT MAX(x), 12) x ROW(11, 12) = (SELECT MAX(x), 12) ROW(11, 12) IN (SELECT MAX(x), 12)
1 0 0 1 0 0

View File

@@ -844,11 +844,16 @@ x ROW(11, 12) = (SELECT MAX(x), 22) ROW(11, 12) IN (SELECT MAX(x), 22)
1 0 0 1 0 0
2 0 0 2 0 0
11 0 0 11 0 0
# 2nd and 3rd columns should be same for x == 11 only # 2nd and 3rd columns should be same
EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 3
3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1; SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
x ROW(11, 12) = (SELECT MAX(x), 12) ROW(11, 12) IN (SELECT MAX(x), 12) x ROW(11, 12) = (SELECT MAX(x), 12) ROW(11, 12) IN (SELECT MAX(x), 12)
1 0 1 1 0 0
2 0 1 2 0 0
11 1 1 11 1 1
DROP TABLE t1; DROP TABLE t1;
# both columns should be same # both columns should be same

View File

@@ -681,7 +681,8 @@ SELECT a, ROW(11, 12) = (SELECT a, 12), ROW(11, 12) IN (SELECT a, 12) FROM t1;
# The x alias is used below to workaround bug #40674. # The x alias is used below to workaround bug #40674.
# Regression tests for sum function on outer column in subselect from dual: # Regression tests for sum function on outer column in subselect from dual:
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 22), ROW(11, 12) IN (SELECT MAX(x), 22) FROM t1; SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 22), ROW(11, 12) IN (SELECT MAX(x), 22) FROM t1;
--echo # 2nd and 3rd columns should be same for x == 11 only --echo # 2nd and 3rd columns should be same
EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1; SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
DROP TABLE t1; DROP TABLE t1;

View File

@@ -62,7 +62,6 @@ void Item_subselect::init(st_select_lex *select_lex,
DBUG_PRINT("enter", ("select_lex: 0x%lx", (long) select_lex)); DBUG_PRINT("enter", ("select_lex: 0x%lx", (long) select_lex));
unit= select_lex->master_unit(); unit= select_lex->master_unit();
thd= unit->thd; thd= unit->thd;
is_min_max_optimized= FALSE;
if (unit->item) if (unit->item)
{ {
/* /*
@@ -160,8 +159,8 @@ void Item_in_subselect::cleanup()
} }
first_execution= TRUE; first_execution= TRUE;
is_constant= FALSE; is_constant= FALSE;
if (exec_method == MATERIALIZATION) if (in_strategy & SUBS_MATERIALIZATION)
exec_method= NOT_TRANSFORMED; in_strategy= 0;
pushed_cond_guards= NULL; pushed_cond_guards= NULL;
Item_subselect::cleanup(); Item_subselect::cleanup();
DBUG_VOID_RETURN; DBUG_VOID_RETURN;
@@ -618,7 +617,7 @@ bool Item_in_subselect::exec()
- on a cost-based basis, that takes into account the cost of a cache - on a cost-based basis, that takes into account the cost of a cache
lookup, the cache hit rate, and the savings per cache hit. lookup, the cache hit rate, and the savings per cache hit.
*/ */
if (!left_expr_cache && exec_method == MATERIALIZATION) if (!left_expr_cache && (in_strategy & SUBS_MATERIALIZATION))
init_left_expr_cache(); init_left_expr_cache();
/* /*
@@ -1072,7 +1071,7 @@ Item_in_subselect::Item_in_subselect(Item * left_exp,
st_select_lex *select_lex): st_select_lex *select_lex):
Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE), Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE),
is_constant(FALSE), optimizer(0), pushed_cond_guards(NULL), is_constant(FALSE), optimizer(0), pushed_cond_guards(NULL),
exec_method(NOT_TRANSFORMED), upper_item(0) in_strategy(0), upper_item(0)
{ {
DBUG_ENTER("Item_in_subselect::Item_in_subselect"); DBUG_ENTER("Item_in_subselect::Item_in_subselect");
left_expr= left_exp; left_expr= left_exp;
@@ -1488,7 +1487,6 @@ Item_in_subselect::single_value_transformer(JOIN *join)
} }
/* fix fields is already called for left expression */ /* fix fields is already called for left expression */
substitution= func->create(left_expr, subs); substitution= func->create(left_expr, subs);
is_min_max_optimized= TRUE;
DBUG_RETURN(RES_OK); DBUG_RETURN(RES_OK);
} }
@@ -1555,8 +1553,21 @@ Item_in_subselect::single_value_transformer(JOIN *join)
} }
bool Item_in_subselect::fix_having(Item *having, SELECT_LEX *select_lex)
{
bool fix_res= 0;
if (!having->fixed)
{
select_lex->having_fix_field= 1;
fix_res= having->fix_fields(thd, 0);
select_lex->having_fix_field= 0;
}
return fix_res;
}
/** /**
Transofrm an IN predicate into EXISTS via predicate injection. Transform an IN predicate into EXISTS via predicate injection.
@details The transformation injects additional predicates into the subquery @details The transformation injects additional predicates into the subquery
(and makes the subquery correlated) as follows. (and makes the subquery correlated) as follows.
@@ -1607,6 +1618,9 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join,
DBUG_ENTER("Item_in_subselect::create_single_in_to_exists_cond"); DBUG_ENTER("Item_in_subselect::create_single_in_to_exists_cond");
*where_item= NULL;
*having_item= NULL;
if (join_having || select_lex->with_sum_func || if (join_having || select_lex->with_sum_func ||
select_lex->group_list.elements) select_lex->group_list.elements)
{ {
@@ -1626,11 +1640,11 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join,
item= new Item_func_trig_cond(item, get_cond_guard(0)); item= new Item_func_trig_cond(item, get_cond_guard(0));
} }
if (item->fix_fields(thd, 0)) if (!join_having)
item->name= (char*) in_having_cond;
if (fix_having(item, select_lex))
DBUG_RETURN(RES_ERROR); DBUG_RETURN(RES_ERROR);
*having_item= item; *having_item= item;
*where_item= NULL;
} }
else else
{ {
@@ -1651,10 +1665,9 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join,
get_cond_guard(0)))) get_cond_guard(0))))
DBUG_RETURN(RES_ERROR); DBUG_RETURN(RES_ERROR);
} }
having->name= (char*) in_having_cond;
if (having->fix_fields(thd, 0)) if (fix_having(having, select_lex))
DBUG_RETURN(RES_ERROR); DBUG_RETURN(RES_ERROR);
*having_item= having; *having_item= having;
item= new Item_cond_or(item, item= new Item_cond_or(item,
@@ -1670,20 +1683,20 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join,
DBUG_RETURN(RES_ERROR); DBUG_RETURN(RES_ERROR);
} }
if (item->fix_fields(thd, 0)) /*
TODO: figure out why the following is done here in
single_value_transformer but there is no corresponding action in
row_value_transformer?
*/
item->name= (char *) in_additional_cond;
if (!item->fixed && item->fix_fields(thd, 0))
DBUG_RETURN(RES_ERROR); DBUG_RETURN(RES_ERROR);
*where_item= item; *where_item= item;
} }
else else
{ {
if (select_lex->master_unit()->is_union()) if (select_lex->master_unit()->is_union())
{ {
/*
comparison functions can't be changed during fix_fields()
we can assign select_lex->having here, and pass 0 as last
argument (reference) to fix_fields()
*/
Item *new_having= Item *new_having=
func->create(expr, func->create(expr,
new Item_ref_null_helper(&select_lex->context, this, new Item_ref_null_helper(&select_lex->context, this,
@@ -1697,132 +1710,10 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join,
DBUG_RETURN(RES_ERROR); DBUG_RETURN(RES_ERROR);
} }
if (new_having->fix_fields(thd, 0)) new_having->name= (char*) in_having_cond;
if (fix_having(new_having, select_lex))
DBUG_RETURN(RES_ERROR); DBUG_RETURN(RES_ERROR);
*having_item= new_having; *having_item= new_having;
*where_item= NULL;
}
else
{
DBUG_ASSERT(FALSE);
/* TIMOUR TODO */
*having_item= NULL;
*where_item= (Item*) select_lex->item_list.head();
}
}
}
DBUG_RETURN(RES_OK);
}
Item_subselect::trans_res
Item_in_subselect::inject_single_in_to_exists_cond(JOIN * join,
Item *where_item,
Item *having_item)
{
SELECT_LEX *select_lex= join->select_lex;
/*
The non-transformed HAVING clause of 'join' may be stored differently in
JOIN::optimize:
this->tmp_having= this->having
this->having= 0;
*/
Item* join_having= join->having ? join->having : join->tmp_having;
bool fix_res= 0;
DBUG_ENTER("Item_in_subselect::inject_single_in_to_exists_cond");
if (join_having || select_lex->with_sum_func ||
select_lex->group_list.elements)
{
/*
AND and comparison functions can't be changed during fix_fields()
we can assign select_lex->having here, and pass 0 as last
argument (reference) to fix_fields()
*/
thd->change_item_tree(&select_lex->having, and_items(join_having, having_item));
join->having= select_lex->having;
if (join->having == having_item)
having_item->name= (char*)in_having_cond;
select_lex->having_fix_field= 1;
/*
we do not check join->having->fixed, because Item_and (from and_items)
or comparison function (from func->create) can't be fixed after creation
*/
if (!join->having->fixed)
fix_res= join->having->fix_fields(thd, 0);
select_lex->having_fix_field= 0;
if (fix_res)
DBUG_RETURN(RES_ERROR);
}
else
{
if (select_lex->table_list.elements)
{
if (!abort_on_null && select_lex->item_list.head()->maybe_null)
{
/*
Item_is_not_null_test can't be changed during fix_fields()
we can assign select_lex->having here, and pass 0 as last
argument (reference) to fix_fields()
*/
having_item->name= (char*)in_having_cond;
thd->change_item_tree(&select_lex->having, having_item);
join->having= select_lex->having;
select_lex->having_fix_field= 1;
/*
we do not check join->having->fixed, because Item_and (from
and_items) or comparison function (from func->create) can't be
fixed after creation
*/
if (!join->having->fixed)
fix_res= join->having->fix_fields(thd, 0);
select_lex->having_fix_field= 0;
if (fix_res)
DBUG_RETURN(RES_ERROR);
}
/*
TODO: figure out why the following is done here in
single_value_transformer but there is no corresponding action in
row_value_transformer?
*/
where_item->name= (char *)in_additional_cond;
/*
AND can't be changed during fix_fields()
we can assign select_lex->having here, and pass 0 as last
argument (reference) to fix_fields()
*/
thd->change_item_tree(&select_lex->where, and_items(join->conds, where_item));
join->conds= select_lex->where;
select_lex->where->top_level_item();
/*
we do not check join->conds->fixed, because Item_and can't be fixed
after creation
*/
if (!join->conds->fixed && join->conds->fix_fields(thd, 0))
DBUG_RETURN(RES_ERROR);
}
else
{
if (select_lex->master_unit()->is_union())
{
having_item->name= (char*)in_having_cond;
thd->change_item_tree(&select_lex->having, having_item);
join->having= select_lex->having;
select_lex->having_fix_field= 1;
/*
we do not check join->having->fixed, because comparison function
(from func->create) can't be fixed after creation
*/
if (!join->having->fixed)
fix_res= join->having->fix_fields(thd, 0);
select_lex->having_fix_field= 0;
if (fix_res)
DBUG_RETURN(RES_ERROR);
} }
else else
DBUG_ASSERT(FALSE); DBUG_ASSERT(FALSE);
@@ -1992,7 +1883,6 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join,
item_having_part2->top_level_item(); item_having_part2->top_level_item();
} }
*having_item= and_items(*having_item, item_having_part2); *having_item= and_items(*having_item, item_having_part2);
(*having_item)->top_level_item();
} }
else else
{ {
@@ -2074,66 +1964,22 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join,
} }
*where_item= and_items(*where_item, item); *where_item= and_items(*where_item, item);
} }
(*where_item)->fix_fields(thd, 0);
} }
DBUG_RETURN(RES_OK); if (*where_item)
}
Item_subselect::trans_res
Item_in_subselect::inject_row_in_to_exists_cond(JOIN * join,
Item *where_item,
Item *having_item)
{
SELECT_LEX *select_lex= join->select_lex;
/*
The non-transformed HAVING clause of 'join' may be stored differently in
JOIN::optimize:
this->tmp_having= this->having
this->having= 0;
*/
Item* join_having= join->having ? join->having : join->tmp_having;
bool is_having_used= (join_having || select_lex->with_sum_func ||
select_lex->group_list.first ||
!select_lex->table_list.elements);
DBUG_ENTER("Item_in_subselect::inject_row_in_to_exists_cond");
if (!is_having_used)
{ {
/* if (!(*where_item)->fixed && (*where_item)->fix_fields(thd, 0))
AND can't be changed during fix_fields()
we can assign select_lex->where here, and pass 0 as last
argument (reference) to fix_fields()
*/
thd->change_item_tree(&select_lex->where, and_items(join->conds, where_item));
join->conds= select_lex->where;
select_lex->where->top_level_item();
if (!join->conds->fixed && join->conds->fix_fields(thd, 0))
DBUG_RETURN(RES_ERROR); DBUG_RETURN(RES_ERROR);
(*where_item)->top_level_item();
} }
if (having_item) if (*having_item)
{ {
bool res; if (!join_having)
thd->change_item_tree(&select_lex->having, (*having_item)->name= (char*) in_having_cond;
and_items(join_having, having_item)); if (fix_having(*having_item, select_lex))
join->having= select_lex->having;
if (having_item == select_lex->having)
having_item->name= (char*)in_having_cond;
select_lex->having->top_level_item();
/*
AND can't be changed during fix_fields()
we can assign select_lex->having here, and pass 0 as last
argument (reference) to fix_fields()
*/
select_lex->having_fix_field= 1;
if (!join->having->fixed)
res= join->having->fix_fields(thd, 0);
select_lex->having_fix_field= 0;
if (res)
DBUG_RETURN(RES_ERROR); DBUG_RETURN(RES_ERROR);
(*having_item)->top_level_item();
} }
DBUG_RETURN(RES_OK); DBUG_RETURN(RES_OK);
@@ -2175,20 +2021,36 @@ bool Item_in_subselect::create_in_to_exists_cond(JOIN *join_arg)
} }
bool Item_in_subselect::inject_in_to_exists_cond(JOIN * join_arg) bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg)
{ {
Item_subselect::trans_res res; SELECT_LEX *select_lex= join_arg->select_lex;
Item *where_item= join_arg->in_to_exists_where;
Item *having_item= join_arg->in_to_exists_having;
if (left_expr->cols() == 1) DBUG_ENTER("Item_in_subselect::inject_in_to_exists_cond");
res= inject_single_in_to_exists_cond(join_arg,
join_arg->in_to_exists_where,
join_arg->in_to_exists_having);
else
res= inject_row_in_to_exists_cond(join_arg,
join_arg->in_to_exists_where,
join_arg->in_to_exists_having);
return (res != RES_OK && res != RES_REDUCE); if (where_item)
{
where_item= and_items(join_arg->conds, where_item);
if (!where_item->fixed && where_item->fix_fields(thd, 0))
DBUG_RETURN(true);
thd->change_item_tree(&select_lex->where, where_item);
select_lex->where->top_level_item();
join_arg->conds= select_lex->where;
}
if (having_item)
{
Item* join_having= join_arg->having ? join_arg->having:join_arg->tmp_having;
having_item= and_items(join_having, having_item);
if (fix_having(having_item, select_lex))
DBUG_RETURN(true);
thd->change_item_tree(&select_lex->having, having_item);
select_lex->having->top_level_item();
join_arg->having= select_lex->having;
}
DBUG_RETURN(false);
} }
@@ -2301,7 +2163,7 @@ err:
void Item_in_subselect::print(String *str, enum_query_type query_type) void Item_in_subselect::print(String *str, enum_query_type query_type)
{ {
if (exec_method == IN_TO_EXISTS) if (in_strategy & SUBS_IN_TO_EXISTS)
str->append(STRING_WITH_LEN("<exists>")); str->append(STRING_WITH_LEN("<exists>"));
else else
{ {
@@ -2317,7 +2179,7 @@ bool Item_in_subselect::fix_fields(THD *thd_arg, Item **ref)
uint outer_cols_num; uint outer_cols_num;
List<Item> *inner_cols; List<Item> *inner_cols;
if (exec_method == SEMI_JOIN) if (in_strategy & SUBS_SEMI_JOIN)
return !( (*ref)= new Item_int(1)); return !( (*ref)= new Item_int(1));
/* /*
@@ -2498,10 +2360,7 @@ bool Item_in_subselect::init_cond_guards()
Callback to test if an IN predicate is expensive. Callback to test if an IN predicate is expensive.
@details @details
IN predicates are considered expensive only if they will be executed via The return value affects the behavior of make_cond_for_table().
materialization. The return value affects the behavior of
make_cond_for_table() in such a way that it is unchanged when we use
the IN=>EXISTS transformation to compute IN.
@retval TRUE if the predicate is expensive @retval TRUE if the predicate is expensive
@retval FALSE otherwise @retval FALSE otherwise
@@ -2509,7 +2368,8 @@ bool Item_in_subselect::init_cond_guards()
bool Item_in_subselect::is_expensive_processor(uchar *arg) bool Item_in_subselect::is_expensive_processor(uchar *arg)
{ {
return TRUE; // exec_method == MATERIALIZATION; /* TIMOUR: TODO: decide on a cost basis whether it is expensive or not. */
return TRUE;
} }
@@ -2517,7 +2377,7 @@ Item_subselect::trans_res
Item_allany_subselect::select_transformer(JOIN *join) Item_allany_subselect::select_transformer(JOIN *join)
{ {
DBUG_ENTER("Item_allany_subselect::select_transformer"); DBUG_ENTER("Item_allany_subselect::select_transformer");
exec_method= IN_TO_EXISTS; in_strategy= SUBS_IN_TO_EXISTS;
if (upper_item) if (upper_item)
upper_item->show= 1; upper_item->show= 1;
DBUG_RETURN(select_in_like_transformer(join)); DBUG_RETURN(select_in_like_transformer(join));
@@ -2526,7 +2386,7 @@ Item_allany_subselect::select_transformer(JOIN *join)
void Item_allany_subselect::print(String *str, enum_query_type query_type) void Item_allany_subselect::print(String *str, enum_query_type query_type)
{ {
if (exec_method == IN_TO_EXISTS) if (in_strategy & SUBS_IN_TO_EXISTS)
str->append(STRING_WITH_LEN("<exists>")); str->append(STRING_WITH_LEN("<exists>"));
else else
{ {

View File

@@ -108,9 +108,6 @@ public:
/* subquery is transformed */ /* subquery is transformed */
bool changed; bool changed;
/* TIMOUR: this is temporary, remove it. */
bool is_min_max_optimized;
/* TRUE <=> The underlying SELECT is correlated w.r.t some ancestor select */ /* TRUE <=> The underlying SELECT is correlated w.r.t some ancestor select */
bool is_correlated; bool is_correlated;
@@ -121,6 +118,12 @@ public:
Item_subselect(); Item_subselect();
virtual subs_type substype() { return UNKNOWN_SUBS; } virtual subs_type substype() { return UNKNOWN_SUBS; }
bool is_in_predicate()
{
return (substype() == Item_subselect::IN_SUBS ||
substype() == Item_subselect::ALL_SUBS ||
substype() == Item_subselect::ANY_SUBS);
}
/* /*
We need this method, because some compilers do not allow 'this' We need this method, because some compilers do not allow 'this'
@@ -314,6 +317,18 @@ public:
}; };
/*
Possible methods to execute an IN predicate. These are set by the optimizer
based on user-set optimizer switches, syntactic analysis and cost comparison.
*/
#define SUBS_NOT_TRANSFORMED 0 /* No execution method was chosen for this IN. */
#define SUBS_SEMI_JOIN 1 /* IN was converted to semi-join. */
#define SUBS_IN_TO_EXISTS 2 /* IN was converted to correlated EXISTS. */
#define SUBS_MATERIALIZATION 4 /* Execute IN via subquery materialization. */
/* Partial matching substrategies of MATERIALIZATION. */
#define SUBS_PARTIAL_MATCH_ROWID_MERGE 8
#define SUBS_PARTIAL_MATCH_TABLE_SCAN 16
/** /**
Representation of IN subquery predicates of the form Representation of IN subquery predicates of the form
"left_expr IN (SELECT ...)". "left_expr IN (SELECT ...)".
@@ -362,19 +377,13 @@ protected:
trans_res select_in_like_transformer(JOIN *join); trans_res select_in_like_transformer(JOIN *join);
trans_res single_value_transformer(JOIN *join); trans_res single_value_transformer(JOIN *join);
trans_res row_value_transformer(JOIN * join); trans_res row_value_transformer(JOIN * join);
bool fix_having(Item *having, st_select_lex *select_lex);
trans_res create_single_in_to_exists_cond(JOIN * join, trans_res create_single_in_to_exists_cond(JOIN * join,
Item **where_item, Item **where_item,
Item **having_item); Item **having_item);
trans_res inject_single_in_to_exists_cond(JOIN * join,
Item *where_item,
Item *having_item);
trans_res create_row_in_to_exists_cond(JOIN * join, trans_res create_row_in_to_exists_cond(JOIN * join,
Item **where_item, Item **where_item,
Item **having_item); Item **having_item);
trans_res inject_row_in_to_exists_cond(JOIN * join,
Item *where_item,
Item *having_item);
public: public:
Item *left_expr; Item *left_expr;
/* Priority of this predicate in the convert-to-semi-join-nest process. */ /* Priority of this predicate in the convert-to-semi-join-nest process. */
@@ -407,14 +416,8 @@ public:
*/ */
bool sjm_scan_allowed; bool sjm_scan_allowed;
/* The method chosen to execute the IN predicate. */ /* A bitmap of possible execution strategies for an IN predicate. */
enum enum_exec_method { uchar in_strategy;
NOT_TRANSFORMED, /* No execution method was chosen for this IN. */
SEMI_JOIN, /* IN was converted to semi-join nest and should be removed. */
IN_TO_EXISTS, /* IN was converted to correlated EXISTS. */
MATERIALIZATION /* IN will be executed via subquery materialization. */
};
enum_exec_method exec_method;
bool *get_cond_guard(int i) bool *get_cond_guard(int i)
{ {
@@ -433,7 +436,7 @@ public:
Item_in_subselect() Item_in_subselect()
:Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE), :Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE),
is_constant(FALSE), optimizer(0), abort_on_null(0), is_constant(FALSE), optimizer(0), abort_on_null(0),
pushed_cond_guards(NULL), func(NULL), exec_method(NOT_TRANSFORMED), pushed_cond_guards(NULL), func(NULL), in_strategy(0),
upper_item(0) upper_item(0)
{} {}
void cleanup(); void cleanup();
@@ -446,8 +449,8 @@ public:
was_null= 0; was_null= 0;
} }
trans_res select_transformer(JOIN *join); trans_res select_transformer(JOIN *join);
bool create_in_to_exists_cond(JOIN * join_arg); bool create_in_to_exists_cond(JOIN *join_arg);
bool inject_in_to_exists_cond(JOIN * join_arg); bool inject_in_to_exists_cond(JOIN *join_arg);
virtual bool exec(); virtual bool exec();
longlong val_int(); longlong val_int();

View File

@@ -563,20 +563,20 @@ protected:
#define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION 4 #define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION 4
#define OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT 8 #define OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT 8
#define OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN 16 #define OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN 16
#define OPTIMIZER_SWITCH_FIRSTMATCH 32 #define OPTIMIZER_SWITCH_FIRSTMATCH 32
#define OPTIMIZER_SWITCH_LOOSE_SCAN 64 #define OPTIMIZER_SWITCH_LOOSE_SCAN 64
#define OPTIMIZER_SWITCH_MATERIALIZATION 128 #define OPTIMIZER_SWITCH_MATERIALIZATION 128
#define OPTIMIZER_SWITCH_SEMIJOIN 256 #define OPTIMIZER_SWITCH_IN_TO_EXISTS 256
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 512 #define OPTIMIZER_SWITCH_SEMIJOIN 512
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN 1024 #define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 1024
#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<11) #define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN (1<<11)
#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<12)
#ifdef DBUG_OFF #ifdef DBUG_OFF
# define OPTIMIZER_SWITCH_LAST (1<<12)
#else
# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<12)
# define OPTIMIZER_SWITCH_LAST (1<<13) # define OPTIMIZER_SWITCH_LAST (1<<13)
#else
# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<13)
# define OPTIMIZER_SWITCH_LAST (1<<14)
#endif #endif
#ifdef DBUG_OFF #ifdef DBUG_OFF
@@ -589,6 +589,7 @@ protected:
OPTIMIZER_SWITCH_FIRSTMATCH | \ OPTIMIZER_SWITCH_FIRSTMATCH | \
OPTIMIZER_SWITCH_LOOSE_SCAN | \ OPTIMIZER_SWITCH_LOOSE_SCAN | \
OPTIMIZER_SWITCH_MATERIALIZATION | \ OPTIMIZER_SWITCH_MATERIALIZATION | \
OPTIMIZER_SWITCH_IN_TO_EXISTS | \
OPTIMIZER_SWITCH_SEMIJOIN | \ OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\ OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\ OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
@@ -603,6 +604,7 @@ protected:
OPTIMIZER_SWITCH_FIRSTMATCH | \ OPTIMIZER_SWITCH_FIRSTMATCH | \
OPTIMIZER_SWITCH_LOOSE_SCAN | \ OPTIMIZER_SWITCH_LOOSE_SCAN | \
OPTIMIZER_SWITCH_MATERIALIZATION | \ OPTIMIZER_SWITCH_MATERIALIZATION | \
OPTIMIZER_SWITCH_IN_TO_EXISTS | \
OPTIMIZER_SWITCH_SEMIJOIN | \ OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\ OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\ OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\

View File

@@ -304,7 +304,7 @@ static const char *optimizer_switch_names[]=
"index_merge","index_merge_union","index_merge_sort_union", "index_merge","index_merge_union","index_merge_sort_union",
"index_merge_intersection", "index_merge_intersection",
"index_condition_pushdown", "index_condition_pushdown",
"firstmatch","loosescan","materialization", "semijoin", "firstmatch","loosescan","materialization","in_to_exists","semijoin",
"partial_match_rowid_merge", "partial_match_rowid_merge",
"partial_match_table_scan", "partial_match_table_scan",
"subquery_cache", "subquery_cache",
@@ -325,6 +325,7 @@ static const unsigned int optimizer_switch_names_len[]=
sizeof("firstmatch") - 1, sizeof("firstmatch") - 1,
sizeof("loosescan") - 1, sizeof("loosescan") - 1,
sizeof("materialization") - 1, sizeof("materialization") - 1,
sizeof("in_to_exists") - 1,
sizeof("semijoin") - 1, sizeof("semijoin") - 1,
sizeof("partial_match_rowid_merge") - 1, sizeof("partial_match_rowid_merge") - 1,
sizeof("partial_match_table_scan") - 1, sizeof("partial_match_table_scan") - 1,
@@ -412,6 +413,7 @@ static const char *optimizer_switch_str="index_merge=on,index_merge_union=on,"
"firstmatch=on," "firstmatch=on,"
"loosescan=on," "loosescan=on,"
"materialization=on," "materialization=on,"
"in_to_exists=on,"
"semijoin=on," "semijoin=on,"
"partial_match_rowid_merge=on," "partial_match_rowid_merge=on,"
"partial_match_table_scan=on," "partial_match_table_scan=on,"
@@ -7233,7 +7235,7 @@ The minimum value for this variable is 4096.",
{"optimizer_switch", OPT_OPTIMIZER_SWITCH, {"optimizer_switch", OPT_OPTIMIZER_SWITCH,
"optimizer_switch=option=val[,option=val...], where option={index_merge, " "optimizer_switch=option=val[,option=val...], where option={index_merge, "
"index_merge_union, index_merge_sort_union, index_merge_intersection, " "index_merge_union, index_merge_sort_union, index_merge_intersection, "
"index_condition_pushdown, firstmatch, loosescan, materialization, " "index_condition_pushdown, firstmatch, loosescan, materialization, in_to_exists, "
"semijoin, partial_match_rowid_merge, partial_match_table_scan, " "semijoin, partial_match_rowid_merge, partial_match_table_scan, "
"subquery_cache" "subquery_cache"
#ifndef DBUG_OFF #ifndef DBUG_OFF

View File

@@ -165,7 +165,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
thd->thd_marker.emb_on_expr_nest && // 5 thd->thd_marker.emb_on_expr_nest && // 5
select_lex->outer_select()->join && // 6 select_lex->outer_select()->join && // 6
parent_unit->first_select()->leaf_tables && // 7 parent_unit->first_select()->leaf_tables && // 7
in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED && // 8 !in_subs->in_strategy && // 8
select_lex->outer_select()->leaf_tables && // 9 select_lex->outer_select()->leaf_tables && // 9
!((join->select_options | // 10 !((join->select_options | // 10
select_lex->outer_select()->join->select_options) // 10 select_lex->outer_select()->join->select_options) // 10
@@ -185,23 +185,34 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
else else
{ {
DBUG_PRINT("info", ("Subquery can't be converted to semi-join")); DBUG_PRINT("info", ("Subquery can't be converted to semi-join"));
if (!optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS) &&
!optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION))
my_error(ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES, MYF(0));
if (in_subs)
{
/* Subquery predicate is an IN/=ANY predicate. */
if (optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS))
in_subs->in_strategy|= SUBS_IN_TO_EXISTS;
if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION))
in_subs->in_strategy|= SUBS_MATERIALIZATION;
/* /*
Check if the subquery predicate can be executed via materialization. Check if the subquery predicate can be executed via materialization.
The required conditions are: The required conditions are:
1. Subquery predicate is an IN/=ANY subq predicate 1. Subquery is a single SELECT (not a UNION)
2. Subquery is a single SELECT (not a UNION) 2. Subquery is not a table-less query. In this case there is no
3. Subquery is not a table-less query. In this case there is no
point in materializing. point in materializing.
3A The upper query is not a table-less SELECT ... FROM DUAL. We 2A The upper query is not a table-less SELECT ... FROM DUAL. We
can't do materialization for SELECT .. FROM DUAL because it can't do materialization for SELECT .. FROM DUAL because it
does not call setup_subquery_materialization(). We could make does not call setup_subquery_materialization(). We could make
SELECT ... FROM DUAL call that function but that doesn't seem SELECT ... FROM DUAL call that function but that doesn't seem
to be the case that is worth handling. to be the case that is worth handling.
4. Either the subquery predicate is a top-level predicate, or at 3. Either the subquery predicate is a top-level predicate, or at
least one partial match strategy is enabled. If no partial match least one partial match strategy is enabled. If no partial match
strategy is enabled, then materialization cannot be used for strategy is enabled, then materialization cannot be used for
non-top-level queries because it cannot handle NULLs correctly. non-top-level queries because it cannot handle NULLs correctly.
5. Subquery is non-correlated 4. Subquery is non-correlated
TODO: TODO:
This is an overly restrictive condition. It can be extended to: This is an overly restrictive condition. It can be extended to:
(Subquery is non-correlated || (Subquery is non-correlated ||
@@ -209,41 +220,36 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
(Subquery is correlated to the immediate outer query && (Subquery is correlated to the immediate outer query &&
Subquery !contains {GROUP BY, ORDER BY [LIMIT], Subquery !contains {GROUP BY, ORDER BY [LIMIT],
aggregate functions}) && subquery predicate is not under "NOT IN")) aggregate functions}) && subquery predicate is not under "NOT IN"))
6. No execution method was already chosen (by a prepared statement).
(*) The subquery must be part of a SELECT statement. The current (*) The subquery must be part of a SELECT statement. The current
condition also excludes multi-table update statements. condition also excludes multi-table update statements.
Determine whether we will perform subquery materialization before
calling the IN=>EXISTS transformation, so that we know whether to
perform the whole transformation or only that part of it which wraps
Item_in_subselect in an Item_in_optimizer.
*/ */
if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION) && if (!(in_subs->in_strategy & SUBS_MATERIALIZATION &&
in_subs && // 1 !select_lex->is_part_of_union() && // 1
!select_lex->is_part_of_union() && // 2 parent_unit->first_select()->leaf_tables && // 2
parent_unit->first_select()->leaf_tables && // 3
thd->lex->sql_command == SQLCOM_SELECT && // * thd->lex->sql_command == SQLCOM_SELECT && // *
select_lex->outer_select()->leaf_tables && // 3A select_lex->outer_select()->leaf_tables && // 2A
subquery_types_allow_materialization(in_subs) && subquery_types_allow_materialization(in_subs) &&
// psergey-todo: duplicated_subselect_card_check: where it's done? // psergey-todo: duplicated_subselect_card_check: where it's done?
(in_subs->is_top_level_item() || (in_subs->is_top_level_item() || //3
optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) || optimizer_flag(thd,
optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) &&//4 OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) || //3
!in_subs->is_correlated && // 5 optimizer_flag(thd,
in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6 OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) && //3
!in_subs->is_correlated)) //4
{
/* Materialization is not possible based on syntactic properties. */
in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
}
if (!in_subs->in_strategy)
{ {
/* /*
Materialization is possible, later the optimize phase of each If neither materialization is possible, nor the user chose
subquery will choose either materialization or in-to-exists based IN-TO-EXISTS, choose IN-TO-EXISTS as the only universal strategy.
on cost.
*/ */
in_subs->exec_method= Item_in_subselect::MATERIALIZATION; in_subs->in_strategy|= SUBS_IN_TO_EXISTS;
} }
else if (in_subs)
{
/* Materialization is not possible at all. */
in_subs->exec_method= Item_in_subselect::IN_TO_EXISTS;
} }
/* /*
@@ -533,13 +539,13 @@ skip_conversion:
} }
/* /*
Revert to the IN->EXISTS strategy in the rare case when the subquery could Revert to the IN->EXISTS strategy in the rare case when the subquery could
be flattened. not be flattened.
TODO: This is a limitation done for simplicity. Such subqueries could also TODO: This is a limitation done for simplicity. Such subqueries could also
be executed via materialization. In order to determine this, we should be executed via materialization. In order to determine this, we should
re-run the test for materialization that was done in re-run the test for materialization that was done in
check_and_do_in_subquery_rewrites. check_and_do_in_subquery_rewrites.
*/ */
(*in_subq)->exec_method= Item_in_subselect::IN_TO_EXISTS; (*in_subq)->in_strategy= SUBS_IN_TO_EXISTS;
} }
if (arena) if (arena)
@@ -800,8 +806,7 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred)
/* 3. Remove the original subquery predicate from the WHERE/ON */ /* 3. Remove the original subquery predicate from the WHERE/ON */
// The subqueries were replaced for Item_int(1) earlier // The subqueries were replaced for Item_int(1) earlier
subq_pred->exec_method= subq_pred->in_strategy= SUBS_SEMI_JOIN; // for subsequent executions
Item_in_subselect::SEMI_JOIN; // for subsequent executions
/*TODO: also reset the 'with_subselect' there. */ /*TODO: also reset the 'with_subselect' there. */
/* n. Adjust the parent_join->tables counter */ /* n. Adjust the parent_join->tables counter */
@@ -3393,6 +3398,7 @@ int rewrite_to_index_subquery_engine(JOIN *join)
JOIN_TAB* join_tab=join->join_tab; JOIN_TAB* join_tab=join->join_tab;
SELECT_LEX_UNIT *unit= join->unit; SELECT_LEX_UNIT *unit= join->unit;
DBUG_ENTER("rewrite_to_index_subquery_engine"); DBUG_ENTER("rewrite_to_index_subquery_engine");
/* /*
is this simple IN subquery? is this simple IN subquery?
*/ */
@@ -3564,56 +3570,157 @@ bool JOIN::optimize_unflattened_subqueries()
} }
bool JOIN::choose_subquery_plan() bool JOIN::choose_subquery_plan(table_map join_tables)
{ {
double mat_strategy_cost; /* The cost to compute IN via materialization. */ /* The original QEP of the subquery. */
double in_exists_strategy_cost; /* The cost of the IN->EXISTS strategy. */ DYNAMIC_ARRAY save_keyuse; /* Copy of the JOIN::keyuse array. */
bool res; POSITION save_best_positions[MAX_TABLES+1]; /* Copy of JOIN::best_positions */
/* Copies of the JOIN_TAB::keyuse pointers for each JOIN_TAB. */
KEYUSE *save_join_tab_keyuse[MAX_TABLES];
/* Copies of JOIN_TAB::checked_keys for each JOIN_TAB. */
key_map save_join_tab_checked_keys[MAX_TABLES];
DBUG_ASSERT(in_to_exists_where || in_to_exists_having); bool in_exists_reoptimized= false;
DBUG_ASSERT(select_lex->master_unit()->item && Item_in_subselect *in_subs;
(select_lex->master_unit()->item->substype() ==
Item_subselect::IN_SUBS ||
select_lex->master_unit()->item->substype() ==
Item_subselect::ALL_SUBS ||
select_lex->master_unit()->item->substype() ==
Item_subselect::ANY_SUBS));
Item_in_subselect *in_subs= (Item_in_subselect*) if (select_lex->master_unit()->item &&
select_lex->master_unit()->item; select_lex->master_unit()->item->is_in_predicate())
/* Always revert to IN->EXISTS. */
mat_strategy_cost= 0;
in_exists_strategy_cost= 1;
/*
If (1) materialization is a possible strategy based on static analysis, and
(2) it is cheaper strategy than the IN->EXISTS transformation, then compute
in_subs via the materialization trategy.
*/
if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION && // 1
mat_strategy_cost < in_exists_strategy_cost) // 2
{ {
if (in_subs->setup_mat_engine()) in_subs= (Item_in_subselect*) select_lex->master_unit()->item;
{ if (in_subs->create_in_to_exists_cond(this))
/* return true;
In some cases it is not possible to create usable indexes for the
materialization strategy, so fall back to IN->EXISTS.
*/
in_subs->exec_method= Item_in_subselect::IN_TO_EXISTS;
} }
else
return false;
DBUG_ASSERT(in_subs->in_strategy); /* A strategy must be chosen earlier. */
DBUG_ASSERT(in_to_exists_where || in_to_exists_having);
DBUG_ASSERT(!in_to_exists_where || in_to_exists_where->fixed);
DBUG_ASSERT(!in_to_exists_having || in_to_exists_having->fixed);
save_keyuse.elements= 0;
save_keyuse.buffer= NULL;
/*
Compute and compare the costs of materialization and in-exists if both
strategies are possible and allowed by the user (checked during the prepare
phase.
*/
if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
in_subs->in_strategy & SUBS_IN_TO_EXISTS)
{
JOIN *outer_join= unit->outer_select() ? unit->outer_select()->join : NULL;
JOIN *inner_join= this;
/* Cost of the outer JOIN. */
double outer_read_time= 0, outer_record_count= 0;
/* Cost of the unmodified subquery. */
double inner_read_time_1= 0, inner_record_count_1= 0;
/* Cost of the subquery with injected IN-EXISTS predicates. */
double inner_read_time_2= 0, inner_record_count_2= 0;
/* The cost to compute IN via materialization. */
double materialize_strategy_cost= 0;
/* The cost of the IN->EXISTS strategy. */
double in_exists_strategy_cost= 1;
if (outer_join)
get_partial_join_cost(outer_join, outer_join->tables,
&outer_read_time, &outer_record_count);
else
{
/* TODO: outer_join can be NULL for DELETE statements. */
outer_read_time= 1; /* TODO */
outer_record_count= 1; /* TODO */
}
get_partial_join_cost(inner_join, inner_join->tables,
&inner_read_time_1, &inner_record_count_1);
if (in_to_exists_where && const_tables != tables)
{
/*
Re-optimize and cost the subquery taking into account the IN-EXISTS
conditions.
*/
if (save_query_plan(&save_keyuse, save_best_positions,
save_join_tab_keyuse, save_join_tab_checked_keys) ||
reoptimize(in_to_exists_where, join_tables, save_best_positions))
return TRUE;
in_exists_reoptimized= true;
get_partial_join_cost(inner_join, inner_join->tables,
&inner_read_time_2, &inner_record_count_2);
} }
else else
{ {
/* /* Reoptimization would not produce any better plan. */
Previous optimizer phases should have chosen either a materialization inner_read_time_2= inner_read_time_1;
or IN->EXISTS strategy.
*/
DBUG_ASSERT(in_subs->exec_method == Item_in_subselect::IN_TO_EXISTS);
} }
if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION) /* Compute execution costs. */
/*
1. Compute the cost of the materialization strategy.
*/
double materialization_cost; /* The cost of executing the subquery and */
/* storing its result in an indexed temp table.*/
/* The cost of a lookup into the unique index of the materialized table. */
double lookup_cost;
double write_row_cost= 1; /* TODO: what is the real cost to write a row? */
materialization_cost= inner_read_time_1 +
inner_record_count_1 * write_row_cost;
/*
The cost of a hash/btree lookup into a unique index of a materialized
subquery.
TIMOUR: TODO: the block of code below is exact copy/paste from
opt_subselect.cc:optimize_semi_join_nests() - refactor it.
*/
uint rowlen= get_tmp_table_rec_length(unit->first_select()->item_list);
if (rowlen * inner_record_count_1 < thd->variables.max_heap_table_size)
lookup_cost= HEAP_TEMPTABLE_LOOKUP_COST;
else
lookup_cost= DISK_TEMPTABLE_LOOKUP_COST;
materialize_strategy_cost= materialization_cost +
outer_record_count * lookup_cost;
/*
2. Compute the cost of the IN=>EXISTS strategy.
*/
in_exists_strategy_cost= outer_record_count * inner_read_time_2;
/* Compare the costs and choose the cheaper strategy. */
if (materialize_strategy_cost >= in_exists_strategy_cost)
in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
else
in_subs->in_strategy&= ~SUBS_IN_TO_EXISTS;
}
/*
If (1) materialization is a possible strategy based on static analysis
during the prepare phase, then if
(2) it is more expensive than the IN->EXISTS transformation, and
(3) it is not possible to create usable indexes for the materialization
strategy,
fall back to IN->EXISTS.
otherwise use materialization.
*/
if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
in_subs->setup_mat_engine())
{ {
/*
If materialization was the cheaper or the only user-selected strategy,
but it is not possible to execute it due to limitations in the
implementation, fall back to IN-TO-EXISTS.
*/
in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
in_subs->in_strategy|= SUBS_IN_TO_EXISTS;
}
if (in_subs->in_strategy & SUBS_MATERIALIZATION)
{
/* Restore orginal query plan used for materialization. */
if (in_exists_reoptimized)
restore_query_plan(&save_keyuse, save_best_positions,
save_join_tab_keyuse, save_join_tab_checked_keys);
/* TODO: should we set/unset this flag for both select_lex and its unit? */ /* TODO: should we set/unset this flag for both select_lex and its unit? */
in_subs->unit->uncacheable&= ~UNCACHEABLE_DEPENDENT; in_subs->unit->uncacheable&= ~UNCACHEABLE_DEPENDENT;
select_lex->uncacheable&= ~UNCACHEABLE_DEPENDENT; select_lex->uncacheable&= ~UNCACHEABLE_DEPENDENT;
@@ -3633,13 +3740,32 @@ bool JOIN::choose_subquery_plan()
set in the beginning of JOIN::optimize, which was already done. set in the beginning of JOIN::optimize, which was already done.
*/ */
select_limit= in_subs->unit->select_limit_cnt; select_limit= in_subs->unit->select_limit_cnt;
// TODO: inner_join->restore_plan (keyuse, best_positions, best_read)
} }
else if (in_subs->exec_method == Item_in_subselect::IN_TO_EXISTS) else if (in_subs->in_strategy & SUBS_IN_TO_EXISTS)
res= in_subs->inject_in_to_exists_cond(this); {
/* Keep the new query plan with injected conditions, delete the old one. */
if (save_keyuse.elements)
{
DBUG_ASSERT(in_exists_reoptimized);
delete_dynamic(&save_keyuse);
}
if (!in_exists_reoptimized && in_to_exists_where && const_tables != tables)
{
for (uint i= 0; i < tables; i++)
{
join_tab[i].keyuse= NULL;
join_tab[i].checked_keys.clear_all();
}
if (reoptimize(in_to_exists_where, join_tables, NULL))
return TRUE;
}
if (in_subs->inject_in_to_exists_cond(this))
return TRUE;
}
else else
DBUG_ASSERT(FALSE); DBUG_ASSERT(FALSE);
return res; return FALSE;
} }

View File

@@ -6245,3 +6245,5 @@ ER_UNKNOWN_OPTION
eng "Unknown option '%-.64s'" eng "Unknown option '%-.64s'"
ER_BAD_OPTION_VALUE ER_BAD_OPTION_VALUE
eng "Incorrect value '%-.64s' for option '%-.64s'" eng "Incorrect value '%-.64s' for option '%-.64s'"
ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES
eng "At least one of the 'in_to_exists' or 'materialization' optimizer_switch flags must be 'on'."

View File

@@ -2157,8 +2157,8 @@ void st_select_lex::print_limit(THD *thd,
select_limit == 1, and there should be no offset_limit. select_limit == 1, and there should be no offset_limit.
*/ */
(((subs_type == Item_subselect::IN_SUBS) && (((subs_type == Item_subselect::IN_SUBS) &&
((Item_in_subselect*)item)->exec_method == ((Item_in_subselect*)item)->in_strategy &
Item_in_subselect::MATERIALIZATION) ? SUBS_MATERIALIZATION) ?
TRUE : TRUE :
(select_limit->val_int() == 1LL) && (select_limit->val_int() == 1LL) &&
offset_limit == 0)); offset_limit == 0));
@@ -3096,25 +3096,11 @@ bool st_select_lex::optimize_unflattened_subqueries()
Item_subselect *subquery_predicate= un->item; Item_subselect *subquery_predicate= un->item;
if (subquery_predicate) if (subquery_predicate)
{ {
Item_in_subselect *item_in= NULL;
if (subquery_predicate->substype() == Item_subselect::IN_SUBS ||
subquery_predicate->substype() == Item_subselect::ALL_SUBS ||
subquery_predicate->substype() == Item_subselect::ANY_SUBS)
item_in= (Item_in_subselect*) subquery_predicate;
for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select()) for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select())
{ {
JOIN *inner_join= sl->join; JOIN *inner_join= sl->join;
SELECT_LEX *save_select= un->thd->lex->current_select; SELECT_LEX *save_select= un->thd->lex->current_select;
int res; int res;
/*
Make sure that we do not create IN->EXISTS conditions for
subquery predicates that were substituted by Item_maxmin_subselect
or by Item_singlerow_subselect.
*/
DBUG_ASSERT(!item_in || (item_in && !item_in->is_min_max_optimized));
if (item_in && item_in->create_in_to_exists_cond(inner_join))
return TRUE;
/* We need only 1 row to determine existence */ /* We need only 1 row to determine existence */
un->set_limit(un->global_parameters); un->set_limit(un->global_parameters);
un->thd->lex->current_select= sl; un->thd->lex->current_select= sl;

View File

@@ -53,9 +53,9 @@ static bool make_join_statistics(JOIN *join, TABLE_LIST *leaves, COND *conds,
static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse, static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,
JOIN_TAB *join_tab, JOIN_TAB *join_tab,
uint tables, COND *conds, uint tables, COND *conds,
COND_EQUAL *cond_equal,
table_map table_map, SELECT_LEX *select_lex, table_map table_map, SELECT_LEX *select_lex,
st_sargable_param **sargables); st_sargable_param **sargables);
static bool sort_and_filter_keyuse(DYNAMIC_ARRAY *keyuse);
static int sort_keyuse(KEYUSE *a,KEYUSE *b); static int sort_keyuse(KEYUSE *a,KEYUSE *b);
static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse, static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
table_map used_tables); table_map used_tables);
@@ -928,24 +928,29 @@ JOIN::optimize()
error= 0; error= 0;
if (optimize_unflattened_subqueries()) if (optimize_unflattened_subqueries())
DBUG_RETURN(1); DBUG_RETURN(1);
if (in_to_exists_where || in_to_exists_having)
{
/* /*
TIMOUR: TODO: refactor this block and JOIN::choose_subquery_plan TIMOUR: TODO: consider do we need to optimize here at all and refactor
*/ this block and JOIN::choose_subquery_plan.
Item_in_subselect *in_subs= (Item_in_subselect*)
select_lex->master_unit()->item;
if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION) if (choose_subquery_plan())
; // setup materialized execution structures DBUG_RETURN(1);
else if (in_subs->exec_method == Item_in_subselect::IN_TO_EXISTS) */
Item_in_subselect *in_subs;
if (select_lex->master_unit()->item &&
select_lex->master_unit()->item->is_in_predicate())
{ {
in_subs= (Item_in_subselect*) select_lex->master_unit()->item;
if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
in_subs->setup_mat_engine())
in_subs->in_strategy= SUBS_IN_TO_EXISTS;
if (in_subs->in_strategy & SUBS_IN_TO_EXISTS)
{
if (in_subs->create_in_to_exists_cond(this))
DBUG_RETURN(1);
if (in_subs->inject_in_to_exists_cond(this)) if (in_subs->inject_in_to_exists_cond(this))
DBUG_RETURN(1); DBUG_RETURN(1);
tmp_having= having; tmp_having= having;
} }
else
DBUG_ASSERT(FALSE);
} }
DBUG_RETURN(0); DBUG_RETURN(0);
} }
@@ -2828,10 +2833,14 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
} }
if (conds || outer_join) if (conds || outer_join)
{
if (update_ref_and_keys(join->thd, keyuse_array, stat, join->tables, if (update_ref_and_keys(join->thd, keyuse_array, stat, join->tables,
conds, join->cond_equal, conds, ~outer_join, join->select_lex, &sargables))
~outer_join, join->select_lex, &sargables))
goto error; goto error;
if (keyuse_array->elements && sort_and_filter_keyuse(keyuse_array))
goto error;
DBUG_EXECUTE("opt", print_keyuse_array(keyuse_array););
}
join->const_table_map= no_rows_const_tables; join->const_table_map= no_rows_const_tables;
join->const_tables= const_count; join->const_tables= const_count;
@@ -3135,8 +3144,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
sizeof(POSITION)*join->const_tables); sizeof(POSITION)*join->const_tables);
join->best_read=1.0; join->best_read=1.0;
} }
if ((join->in_to_exists_where || join->in_to_exists_having) if (join->choose_subquery_plan(all_table_map & ~join->const_table_map))
&& join->choose_subquery_plan())
goto error; goto error;
/* Generate an execution plan from the found optimal join order. */ /* Generate an execution plan from the found optimal join order. */
@@ -4089,11 +4097,10 @@ static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
static bool static bool
update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab, update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
uint tables, COND *cond, COND_EQUAL *cond_equal, uint tables, COND *cond, table_map normal_tables,
table_map normal_tables, SELECT_LEX *select_lex, SELECT_LEX *select_lex, SARGABLE_PARAM **sargables)
SARGABLE_PARAM **sargables)
{ {
uint and_level,i,found_eq_constant; uint and_level,i;
KEY_FIELD *key_fields, *end, *field; KEY_FIELD *key_fields, *end, *field;
uint sz; uint sz;
uint m= max(select_lex->max_equal_elems,1); uint m= max(select_lex->max_equal_elems,1);
@@ -4189,7 +4196,11 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
return TRUE; return TRUE;
} }
/* return FALSE;
}
/**
Sort the array of possible keys and remove the following key parts: Sort the array of possible keys and remove the following key parts:
- ref if there is a keypart which is a ref and a const. - ref if there is a keypart which is a ref and a const.
(e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d, (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
@@ -4198,21 +4209,26 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
(e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
used in the query, we drop the partial key parts from consideration). used in the query, we drop the partial key parts from consideration).
Special treatment for ft-keys. Special treatment for ft-keys.
*/ */
if (keyuse->elements)
{
KEYUSE key_end,*prev,*save_pos,*use;
my_qsort(keyuse->buffer,keyuse->elements,sizeof(KEYUSE), static bool sort_and_filter_keyuse(DYNAMIC_ARRAY *keyuse)
{
KEYUSE key_end, *prev, *save_pos, *use;
uint found_eq_constant, i;
DBUG_ASSERT(keyuse->elements);
my_qsort(keyuse->buffer, keyuse->elements, sizeof(KEYUSE),
(qsort_cmp) sort_keyuse); (qsort_cmp) sort_keyuse);
bzero((char*) &key_end,sizeof(key_end)); /* Add for easy testing */ bzero((char*) &key_end, sizeof(key_end)); /* Add for easy testing */
if (insert_dynamic(keyuse,(uchar*) &key_end)) if (insert_dynamic(keyuse, (uchar*) &key_end))
return TRUE; return TRUE;
use=save_pos=dynamic_element(keyuse,0,KEYUSE*); use= save_pos= dynamic_element(keyuse,0,KEYUSE*);
prev= &key_end; prev= &key_end;
found_eq_constant=0; found_eq_constant= 0;
for (i=0 ; i < keyuse->elements-1 ; i++,use++) for (i=0 ; i < keyuse->elements-1 ; i++,use++)
{ {
if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL) if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
@@ -4234,7 +4250,7 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
if (save_pos != use) if (save_pos != use)
#endif #endif
*save_pos= *use; *save_pos= *use;
prev=use; prev= use;
found_eq_constant= !use->used_tables; found_eq_constant= !use->used_tables;
/* Save ptr to first use */ /* Save ptr to first use */
if (!use->table->reginfo.join_tab->keyuse) if (!use->table->reginfo.join_tab->keyuse)
@@ -4242,14 +4258,14 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
use->table->reginfo.join_tab->checked_keys.set_bit(use->key); use->table->reginfo.join_tab->checked_keys.set_bit(use->key);
save_pos++; save_pos++;
} }
i=(uint) (save_pos-(KEYUSE*) keyuse->buffer); i= (uint) (save_pos-(KEYUSE*) keyuse->buffer);
VOID(set_dynamic(keyuse,(uchar*) &key_end,i)); VOID(set_dynamic(keyuse,(uchar*) &key_end,i));
keyuse->elements=i; keyuse->elements= i;
}
DBUG_EXECUTE("opt", print_keyuse_array(keyuse););
return FALSE; return FALSE;
} }
/** /**
Update some values in keyuse for faster choose_plan() loop. Update some values in keyuse for faster choose_plan() loop.
*/ */
@@ -19265,6 +19281,131 @@ bool JOIN::change_result(select_result *res)
DBUG_RETURN(FALSE); DBUG_RETURN(FALSE);
} }
/**
Save the original query execution plan so that the caller can revert to it
if needed.
*/
int JOIN::save_query_plan(DYNAMIC_ARRAY *save_keyuse,
POSITION *save_best_positions,
KEYUSE **save_join_tab_keyuse,
key_map *save_join_tab_checked_keys)
{
if (keyuse.elements)
{
DYNAMIC_ARRAY tmp_keyuse;
if (my_init_dynamic_array(save_keyuse, sizeof(KEYUSE), 20, 64))
return 1;
/* Swap the current and the backup keyuse arrays. */
tmp_keyuse= keyuse;
keyuse= (*save_keyuse);
(*save_keyuse)= tmp_keyuse;
for (uint i= 0; i < tables; i++)
{
save_join_tab_keyuse[i]= join_tab[i].keyuse;
join_tab[i].keyuse= NULL;
save_join_tab_checked_keys[i]= join_tab[i].checked_keys;
join_tab[i].checked_keys.clear_all();
}
}
memcpy((uchar*) save_best_positions, (uchar*) best_positions,
sizeof(POSITION) * (tables + 1));
memset(best_positions, 0, sizeof(POSITION) * (tables + 1));
return 0;
}
/**
Restore the query plan saved before reoptimization with additional
conditions.
*/
void JOIN::restore_query_plan(DYNAMIC_ARRAY *save_keyuse,
POSITION *save_best_positions,
KEYUSE **save_join_tab_keyuse,
key_map *save_join_tab_checked_keys)
{
if (save_keyuse->elements)
{
DYNAMIC_ARRAY tmp_keyuse;
tmp_keyuse= keyuse;
keyuse= (*save_keyuse);
(*save_keyuse)= tmp_keyuse;
delete_dynamic(save_keyuse);
for (uint i= 0; i < tables; i++)
{
join_tab[i].keyuse= save_join_tab_keyuse[i];
join_tab[i].checked_keys= save_join_tab_checked_keys[i];
}
}
memcpy((uchar*) best_positions, (uchar*) save_best_positions,
sizeof(POSITION) * (tables + 1));
}
/**
Reoptimize a query plan taking into account an additional conjunct to the
WHERE clause.
*/
int JOIN::reoptimize(Item *added_where, table_map join_tables,
POSITION *save_best_positions)
{
DYNAMIC_ARRAY added_keyuse;
SARGABLE_PARAM *sargables= 0; /* Used only as a dummy parameter. */
if (my_init_dynamic_array(&added_keyuse, sizeof(KEYUSE), 20, 64))
{
delete_dynamic(&added_keyuse);
return 1;
}
/* Re-run the REF optimizer to take into account the new conditions. */
if (update_ref_and_keys(thd, &added_keyuse, join_tab, tables, added_where,
~outer_join, select_lex, &sargables))
{
delete_dynamic(&added_keyuse);
return 1;
}
if (!added_keyuse.elements)
{
/* No need to optimize if no new access methods were discovered. */
if (save_best_positions)
memcpy((uchar*) best_positions, (uchar*) save_best_positions,
sizeof(POSITION) * (tables + 1));
delete_dynamic(&added_keyuse);
return 0;
}
/* Add the new access methods to the keyuse array. */
if (!keyuse.buffer &&
my_init_dynamic_array(&keyuse, sizeof(KEYUSE), 20, 64))
{
delete_dynamic(&added_keyuse);
return 1;
}
allocate_dynamic(&keyuse, keyuse.elements + added_keyuse.elements);
memcpy(keyuse.buffer + keyuse.elements * keyuse.size_of_element,
added_keyuse.buffer,
(size_t) added_keyuse.elements * added_keyuse.size_of_element);
keyuse.elements+= added_keyuse.elements;
delete_dynamic(&added_keyuse);
if (sort_and_filter_keyuse(&keyuse))
return 1;
optimize_keyuse(this, &keyuse);
/* Re-run the join optimizer to compute a new query plan. */
if (choose_plan(this, join_tables))
return 1;
return 0;
}
/** /**
@} (end of group Query_Optimizer) @} (end of group Query_Optimizer)
*/ */

View File

@@ -1369,8 +1369,21 @@ inline bool sj_is_materialize_strategy(uint strategy)
class JOIN :public Sql_alloc class JOIN :public Sql_alloc
{ {
private:
JOIN(const JOIN &rhs); /**< not implemented */ JOIN(const JOIN &rhs); /**< not implemented */
JOIN& operator=(const JOIN &rhs); /**< not implemented */ JOIN& operator=(const JOIN &rhs); /**< not implemented */
protected:
/* Support for plan reoptimization with rewritten conditions. */
int reoptimize(Item *added_where, table_map join_tables,
POSITION *save_best_positions);
int save_query_plan(DYNAMIC_ARRAY *save_keyuse, POSITION *save_positions,
KEYUSE **save_join_tab_keyuse,
key_map *save_join_tab_checked_keys);
void restore_query_plan(DYNAMIC_ARRAY *save_keyuse, POSITION *save_positions,
KEYUSE **save_join_tab_keyuse,
key_map *save_join_tab_checked_keys);
public: public:
JOIN_TAB *join_tab,**best_ref; JOIN_TAB *join_tab,**best_ref;
JOIN_TAB **map2table; ///< mapping between table indexes and JOIN_TABs JOIN_TAB **map2table; ///< mapping between table indexes and JOIN_TABs
@@ -1746,7 +1759,7 @@ public:
NULL : join_tab+const_tables; NULL : join_tab+const_tables;
} }
bool setup_subquery_caches(); bool setup_subquery_caches();
bool choose_subquery_plan(); bool choose_subquery_plan(table_map join_tables);
private: private:
/** /**
TRUE if the query contains an aggregate function but has no GROUP TRUE if the query contains an aggregate function but has no GROUP