1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-30 16:24:05 +03:00

MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation

Phase 3: Implementation of re-optimization of subqueries with injected predicates
           and cost comparison between Materialization and IN->EXISTS strategies.

The commit contains the following known problems:
- The implementation of EXPLAIN has not been re-engineered to reflect the
  changes in subquery optimization. EXPLAIN for subqueries is called during
  the execute phase, which results in different code paths during JOIN::optimize
  and thus in differing EXPLAIN messages for constant/system tables.
- There are some valgrind warnings that need investigation
- Several EXPLAINs with minor differences need to be reconsidered after fixing
  the EXPLAIN problem above.

This patch also adds one extra optimizer_switch: 'in_to_exists' for complete
manual control of the subquery execution strategies.
This commit is contained in:
unknown
2010-09-30 18:32:44 +03:00
parent 50888477af
commit 8ec5e13f1f
12 changed files with 600 additions and 454 deletions

View File

@ -840,7 +840,12 @@ x ROW(11, 12) = (SELECT MAX(x), 22) ROW(11, 12) IN (SELECT MAX(x), 22)
1 0 0
2 0 0
11 0 0
# 2nd and 3rd columns should be same for x == 11 only
# 2nd and 3rd columns should be same
EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 3
3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
x ROW(11, 12) = (SELECT MAX(x), 12) ROW(11, 12) IN (SELECT MAX(x), 12)
1 0 0

View File

@ -844,11 +844,16 @@ x ROW(11, 12) = (SELECT MAX(x), 22) ROW(11, 12) IN (SELECT MAX(x), 22)
1 0 0
2 0 0
11 0 0
# 2nd and 3rd columns should be same for x == 11 only
# 2nd and 3rd columns should be same
EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 3
3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
x ROW(11, 12) = (SELECT MAX(x), 12) ROW(11, 12) IN (SELECT MAX(x), 12)
1 0 1
2 0 1
1 0 0
2 0 0
11 1 1
DROP TABLE t1;
# both columns should be same

View File

@ -681,7 +681,8 @@ SELECT a, ROW(11, 12) = (SELECT a, 12), ROW(11, 12) IN (SELECT a, 12) FROM t1;
# The x alias is used below to workaround bug #40674.
# Regression tests for sum function on outer column in subselect from dual:
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 22), ROW(11, 12) IN (SELECT MAX(x), 22) FROM t1;
--echo # 2nd and 3rd columns should be same for x == 11 only
--echo # 2nd and 3rd columns should be same
EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
DROP TABLE t1;

View File

@ -62,7 +62,6 @@ void Item_subselect::init(st_select_lex *select_lex,
DBUG_PRINT("enter", ("select_lex: 0x%lx", (long) select_lex));
unit= select_lex->master_unit();
thd= unit->thd;
is_min_max_optimized= FALSE;
if (unit->item)
{
/*
@ -160,8 +159,8 @@ void Item_in_subselect::cleanup()
}
first_execution= TRUE;
is_constant= FALSE;
if (exec_method == MATERIALIZATION)
exec_method= NOT_TRANSFORMED;
if (in_strategy & SUBS_MATERIALIZATION)
in_strategy= 0;
pushed_cond_guards= NULL;
Item_subselect::cleanup();
DBUG_VOID_RETURN;
@ -618,7 +617,7 @@ bool Item_in_subselect::exec()
- on a cost-based basis, that takes into account the cost of a cache
lookup, the cache hit rate, and the savings per cache hit.
*/
if (!left_expr_cache && exec_method == MATERIALIZATION)
if (!left_expr_cache && (in_strategy & SUBS_MATERIALIZATION))
init_left_expr_cache();
/*
@ -1072,7 +1071,7 @@ Item_in_subselect::Item_in_subselect(Item * left_exp,
st_select_lex *select_lex):
Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE),
is_constant(FALSE), optimizer(0), pushed_cond_guards(NULL),
exec_method(NOT_TRANSFORMED), upper_item(0)
in_strategy(0), upper_item(0)
{
DBUG_ENTER("Item_in_subselect::Item_in_subselect");
left_expr= left_exp;
@ -1488,7 +1487,6 @@ Item_in_subselect::single_value_transformer(JOIN *join)
}
/* fix fields is already called for left expression */
substitution= func->create(left_expr, subs);
is_min_max_optimized= TRUE;
DBUG_RETURN(RES_OK);
}
@ -1555,8 +1553,21 @@ Item_in_subselect::single_value_transformer(JOIN *join)
}
bool Item_in_subselect::fix_having(Item *having, SELECT_LEX *select_lex)
{
bool fix_res= 0;
if (!having->fixed)
{
select_lex->having_fix_field= 1;
fix_res= having->fix_fields(thd, 0);
select_lex->having_fix_field= 0;
}
return fix_res;
}
/**
Transofrm an IN predicate into EXISTS via predicate injection.
Transform an IN predicate into EXISTS via predicate injection.
@details The transformation injects additional predicates into the subquery
(and makes the subquery correlated) as follows.
@ -1607,6 +1618,9 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join,
DBUG_ENTER("Item_in_subselect::create_single_in_to_exists_cond");
*where_item= NULL;
*having_item= NULL;
if (join_having || select_lex->with_sum_func ||
select_lex->group_list.elements)
{
@ -1626,11 +1640,11 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join,
item= new Item_func_trig_cond(item, get_cond_guard(0));
}
if (item->fix_fields(thd, 0))
if (!join_having)
item->name= (char*) in_having_cond;
if (fix_having(item, select_lex))
DBUG_RETURN(RES_ERROR);
*having_item= item;
*where_item= NULL;
}
else
{
@ -1651,10 +1665,9 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join,
get_cond_guard(0))))
DBUG_RETURN(RES_ERROR);
}
if (having->fix_fields(thd, 0))
DBUG_RETURN(RES_ERROR);
having->name= (char*) in_having_cond;
if (fix_having(having, select_lex))
DBUG_RETURN(RES_ERROR);
*having_item= having;
item= new Item_cond_or(item,
@ -1670,20 +1683,20 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join,
DBUG_RETURN(RES_ERROR);
}
if (item->fix_fields(thd, 0))
DBUG_RETURN(RES_ERROR);
/*
TODO: figure out why the following is done here in
single_value_transformer but there is no corresponding action in
row_value_transformer?
*/
item->name= (char *) in_additional_cond;
if (!item->fixed && item->fix_fields(thd, 0))
DBUG_RETURN(RES_ERROR);
*where_item= item;
}
else
{
if (select_lex->master_unit()->is_union())
{
/*
comparison functions can't be changed during fix_fields()
we can assign select_lex->having here, and pass 0 as last
argument (reference) to fix_fields()
*/
Item *new_having=
func->create(expr,
new Item_ref_null_helper(&select_lex->context, this,
@ -1697,132 +1710,10 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN * join,
DBUG_RETURN(RES_ERROR);
}
if (new_having->fix_fields(thd, 0))
DBUG_RETURN(RES_ERROR);
new_having->name= (char*) in_having_cond;
if (fix_having(new_having, select_lex))
DBUG_RETURN(RES_ERROR);
*having_item= new_having;
*where_item= NULL;
}
else
{
DBUG_ASSERT(FALSE);
/* TIMOUR TODO */
*having_item= NULL;
*where_item= (Item*) select_lex->item_list.head();
}
}
}
DBUG_RETURN(RES_OK);
}
Item_subselect::trans_res
Item_in_subselect::inject_single_in_to_exists_cond(JOIN * join,
Item *where_item,
Item *having_item)
{
SELECT_LEX *select_lex= join->select_lex;
/*
The non-transformed HAVING clause of 'join' may be stored differently in
JOIN::optimize:
this->tmp_having= this->having
this->having= 0;
*/
Item* join_having= join->having ? join->having : join->tmp_having;
bool fix_res= 0;
DBUG_ENTER("Item_in_subselect::inject_single_in_to_exists_cond");
if (join_having || select_lex->with_sum_func ||
select_lex->group_list.elements)
{
/*
AND and comparison functions can't be changed during fix_fields()
we can assign select_lex->having here, and pass 0 as last
argument (reference) to fix_fields()
*/
thd->change_item_tree(&select_lex->having, and_items(join_having, having_item));
join->having= select_lex->having;
if (join->having == having_item)
having_item->name= (char*)in_having_cond;
select_lex->having_fix_field= 1;
/*
we do not check join->having->fixed, because Item_and (from and_items)
or comparison function (from func->create) can't be fixed after creation
*/
if (!join->having->fixed)
fix_res= join->having->fix_fields(thd, 0);
select_lex->having_fix_field= 0;
if (fix_res)
DBUG_RETURN(RES_ERROR);
}
else
{
if (select_lex->table_list.elements)
{
if (!abort_on_null && select_lex->item_list.head()->maybe_null)
{
/*
Item_is_not_null_test can't be changed during fix_fields()
we can assign select_lex->having here, and pass 0 as last
argument (reference) to fix_fields()
*/
having_item->name= (char*)in_having_cond;
thd->change_item_tree(&select_lex->having, having_item);
join->having= select_lex->having;
select_lex->having_fix_field= 1;
/*
we do not check join->having->fixed, because Item_and (from
and_items) or comparison function (from func->create) can't be
fixed after creation
*/
if (!join->having->fixed)
fix_res= join->having->fix_fields(thd, 0);
select_lex->having_fix_field= 0;
if (fix_res)
DBUG_RETURN(RES_ERROR);
}
/*
TODO: figure out why the following is done here in
single_value_transformer but there is no corresponding action in
row_value_transformer?
*/
where_item->name= (char *)in_additional_cond;
/*
AND can't be changed during fix_fields()
we can assign select_lex->having here, and pass 0 as last
argument (reference) to fix_fields()
*/
thd->change_item_tree(&select_lex->where, and_items(join->conds, where_item));
join->conds= select_lex->where;
select_lex->where->top_level_item();
/*
we do not check join->conds->fixed, because Item_and can't be fixed
after creation
*/
if (!join->conds->fixed && join->conds->fix_fields(thd, 0))
DBUG_RETURN(RES_ERROR);
}
else
{
if (select_lex->master_unit()->is_union())
{
having_item->name= (char*)in_having_cond;
thd->change_item_tree(&select_lex->having, having_item);
join->having= select_lex->having;
select_lex->having_fix_field= 1;
/*
we do not check join->having->fixed, because comparison function
(from func->create) can't be fixed after creation
*/
if (!join->having->fixed)
fix_res= join->having->fix_fields(thd, 0);
select_lex->having_fix_field= 0;
if (fix_res)
DBUG_RETURN(RES_ERROR);
}
else
DBUG_ASSERT(FALSE);
@ -1992,7 +1883,6 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join,
item_having_part2->top_level_item();
}
*having_item= and_items(*having_item, item_having_part2);
(*having_item)->top_level_item();
}
else
{
@ -2074,66 +1964,22 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join,
}
*where_item= and_items(*where_item, item);
}
(*where_item)->fix_fields(thd, 0);
}
DBUG_RETURN(RES_OK);
}
Item_subselect::trans_res
Item_in_subselect::inject_row_in_to_exists_cond(JOIN * join,
Item *where_item,
Item *having_item)
{
SELECT_LEX *select_lex= join->select_lex;
/*
The non-transformed HAVING clause of 'join' may be stored differently in
JOIN::optimize:
this->tmp_having= this->having
this->having= 0;
*/
Item* join_having= join->having ? join->having : join->tmp_having;
bool is_having_used= (join_having || select_lex->with_sum_func ||
select_lex->group_list.first ||
!select_lex->table_list.elements);
DBUG_ENTER("Item_in_subselect::inject_row_in_to_exists_cond");
if (!is_having_used)
if (*where_item)
{
/*
AND can't be changed during fix_fields()
we can assign select_lex->where here, and pass 0 as last
argument (reference) to fix_fields()
*/
thd->change_item_tree(&select_lex->where, and_items(join->conds, where_item));
join->conds= select_lex->where;
select_lex->where->top_level_item();
if (!join->conds->fixed && join->conds->fix_fields(thd, 0))
if (!(*where_item)->fixed && (*where_item)->fix_fields(thd, 0))
DBUG_RETURN(RES_ERROR);
(*where_item)->top_level_item();
}
if (having_item)
if (*having_item)
{
bool res;
thd->change_item_tree(&select_lex->having,
and_items(join_having, having_item));
join->having= select_lex->having;
if (having_item == select_lex->having)
having_item->name= (char*)in_having_cond;
select_lex->having->top_level_item();
/*
AND can't be changed during fix_fields()
we can assign select_lex->having here, and pass 0 as last
argument (reference) to fix_fields()
*/
select_lex->having_fix_field= 1;
if (!join->having->fixed)
res= join->having->fix_fields(thd, 0);
select_lex->having_fix_field= 0;
if (res)
if (!join_having)
(*having_item)->name= (char*) in_having_cond;
if (fix_having(*having_item, select_lex))
DBUG_RETURN(RES_ERROR);
(*having_item)->top_level_item();
}
DBUG_RETURN(RES_OK);
@ -2175,20 +2021,36 @@ bool Item_in_subselect::create_in_to_exists_cond(JOIN *join_arg)
}
bool Item_in_subselect::inject_in_to_exists_cond(JOIN * join_arg)
bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg)
{
Item_subselect::trans_res res;
SELECT_LEX *select_lex= join_arg->select_lex;
Item *where_item= join_arg->in_to_exists_where;
Item *having_item= join_arg->in_to_exists_having;
if (left_expr->cols() == 1)
res= inject_single_in_to_exists_cond(join_arg,
join_arg->in_to_exists_where,
join_arg->in_to_exists_having);
else
res= inject_row_in_to_exists_cond(join_arg,
join_arg->in_to_exists_where,
join_arg->in_to_exists_having);
DBUG_ENTER("Item_in_subselect::inject_in_to_exists_cond");
return (res != RES_OK && res != RES_REDUCE);
if (where_item)
{
where_item= and_items(join_arg->conds, where_item);
if (!where_item->fixed && where_item->fix_fields(thd, 0))
DBUG_RETURN(true);
thd->change_item_tree(&select_lex->where, where_item);
select_lex->where->top_level_item();
join_arg->conds= select_lex->where;
}
if (having_item)
{
Item* join_having= join_arg->having ? join_arg->having:join_arg->tmp_having;
having_item= and_items(join_having, having_item);
if (fix_having(having_item, select_lex))
DBUG_RETURN(true);
thd->change_item_tree(&select_lex->having, having_item);
select_lex->having->top_level_item();
join_arg->having= select_lex->having;
}
DBUG_RETURN(false);
}
@ -2301,7 +2163,7 @@ err:
void Item_in_subselect::print(String *str, enum_query_type query_type)
{
if (exec_method == IN_TO_EXISTS)
if (in_strategy & SUBS_IN_TO_EXISTS)
str->append(STRING_WITH_LEN("<exists>"));
else
{
@ -2317,7 +2179,7 @@ bool Item_in_subselect::fix_fields(THD *thd_arg, Item **ref)
uint outer_cols_num;
List<Item> *inner_cols;
if (exec_method == SEMI_JOIN)
if (in_strategy & SUBS_SEMI_JOIN)
return !( (*ref)= new Item_int(1));
/*
@ -2498,10 +2360,7 @@ bool Item_in_subselect::init_cond_guards()
Callback to test if an IN predicate is expensive.
@details
IN predicates are considered expensive only if they will be executed via
materialization. The return value affects the behavior of
make_cond_for_table() in such a way that it is unchanged when we use
the IN=>EXISTS transformation to compute IN.
The return value affects the behavior of make_cond_for_table().
@retval TRUE if the predicate is expensive
@retval FALSE otherwise
@ -2509,7 +2368,8 @@ bool Item_in_subselect::init_cond_guards()
bool Item_in_subselect::is_expensive_processor(uchar *arg)
{
return TRUE; // exec_method == MATERIALIZATION;
/* TIMOUR: TODO: decide on a cost basis whether it is expensive or not. */
return TRUE;
}
@ -2517,7 +2377,7 @@ Item_subselect::trans_res
Item_allany_subselect::select_transformer(JOIN *join)
{
DBUG_ENTER("Item_allany_subselect::select_transformer");
exec_method= IN_TO_EXISTS;
in_strategy= SUBS_IN_TO_EXISTS;
if (upper_item)
upper_item->show= 1;
DBUG_RETURN(select_in_like_transformer(join));
@ -2526,7 +2386,7 @@ Item_allany_subselect::select_transformer(JOIN *join)
void Item_allany_subselect::print(String *str, enum_query_type query_type)
{
if (exec_method == IN_TO_EXISTS)
if (in_strategy & SUBS_IN_TO_EXISTS)
str->append(STRING_WITH_LEN("<exists>"));
else
{

View File

@ -108,9 +108,6 @@ public:
/* subquery is transformed */
bool changed;
/* TIMOUR: this is temporary, remove it. */
bool is_min_max_optimized;
/* TRUE <=> The underlying SELECT is correlated w.r.t some ancestor select */
bool is_correlated;
@ -121,6 +118,12 @@ public:
Item_subselect();
virtual subs_type substype() { return UNKNOWN_SUBS; }
bool is_in_predicate()
{
return (substype() == Item_subselect::IN_SUBS ||
substype() == Item_subselect::ALL_SUBS ||
substype() == Item_subselect::ANY_SUBS);
}
/*
We need this method, because some compilers do not allow 'this'
@ -314,6 +317,18 @@ public:
};
/*
Possible methods to execute an IN predicate. These are set by the optimizer
based on user-set optimizer switches, syntactic analysis and cost comparison.
*/
#define SUBS_NOT_TRANSFORMED 0 /* No execution method was chosen for this IN. */
#define SUBS_SEMI_JOIN 1 /* IN was converted to semi-join. */
#define SUBS_IN_TO_EXISTS 2 /* IN was converted to correlated EXISTS. */
#define SUBS_MATERIALIZATION 4 /* Execute IN via subquery materialization. */
/* Partial matching substrategies of MATERIALIZATION. */
#define SUBS_PARTIAL_MATCH_ROWID_MERGE 8
#define SUBS_PARTIAL_MATCH_TABLE_SCAN 16
/**
Representation of IN subquery predicates of the form
"left_expr IN (SELECT ...)".
@ -362,19 +377,13 @@ protected:
trans_res select_in_like_transformer(JOIN *join);
trans_res single_value_transformer(JOIN *join);
trans_res row_value_transformer(JOIN * join);
bool fix_having(Item *having, st_select_lex *select_lex);
trans_res create_single_in_to_exists_cond(JOIN * join,
Item **where_item,
Item **having_item);
trans_res inject_single_in_to_exists_cond(JOIN * join,
Item *where_item,
Item *having_item);
trans_res create_row_in_to_exists_cond(JOIN * join,
Item **where_item,
Item **having_item);
trans_res inject_row_in_to_exists_cond(JOIN * join,
Item *where_item,
Item *having_item);
public:
Item *left_expr;
/* Priority of this predicate in the convert-to-semi-join-nest process. */
@ -407,14 +416,8 @@ public:
*/
bool sjm_scan_allowed;
/* The method chosen to execute the IN predicate. */
enum enum_exec_method {
NOT_TRANSFORMED, /* No execution method was chosen for this IN. */
SEMI_JOIN, /* IN was converted to semi-join nest and should be removed. */
IN_TO_EXISTS, /* IN was converted to correlated EXISTS. */
MATERIALIZATION /* IN will be executed via subquery materialization. */
};
enum_exec_method exec_method;
/* A bitmap of possible execution strategies for an IN predicate. */
uchar in_strategy;
bool *get_cond_guard(int i)
{
@ -433,7 +436,7 @@ public:
Item_in_subselect()
:Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE),
is_constant(FALSE), optimizer(0), abort_on_null(0),
pushed_cond_guards(NULL), func(NULL), exec_method(NOT_TRANSFORMED),
pushed_cond_guards(NULL), func(NULL), in_strategy(0),
upper_item(0)
{}
void cleanup();
@ -446,8 +449,8 @@ public:
was_null= 0;
}
trans_res select_transformer(JOIN *join);
bool create_in_to_exists_cond(JOIN * join_arg);
bool inject_in_to_exists_cond(JOIN * join_arg);
bool create_in_to_exists_cond(JOIN *join_arg);
bool inject_in_to_exists_cond(JOIN *join_arg);
virtual bool exec();
longlong val_int();

View File

@ -563,20 +563,20 @@ protected:
#define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION 4
#define OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT 8
#define OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN 16
#define OPTIMIZER_SWITCH_FIRSTMATCH 32
#define OPTIMIZER_SWITCH_LOOSE_SCAN 64
#define OPTIMIZER_SWITCH_MATERIALIZATION 128
#define OPTIMIZER_SWITCH_SEMIJOIN 256
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 512
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN 1024
#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<11)
#define OPTIMIZER_SWITCH_IN_TO_EXISTS 256
#define OPTIMIZER_SWITCH_SEMIJOIN 512
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 1024
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN (1<<11)
#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<12)
#ifdef DBUG_OFF
# define OPTIMIZER_SWITCH_LAST (1<<12)
#else
# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<12)
# define OPTIMIZER_SWITCH_LAST (1<<13)
#else
# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<13)
# define OPTIMIZER_SWITCH_LAST (1<<14)
#endif
#ifdef DBUG_OFF
@ -589,6 +589,7 @@ protected:
OPTIMIZER_SWITCH_FIRSTMATCH | \
OPTIMIZER_SWITCH_LOOSE_SCAN | \
OPTIMIZER_SWITCH_MATERIALIZATION | \
OPTIMIZER_SWITCH_IN_TO_EXISTS | \
OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
@ -603,6 +604,7 @@ protected:
OPTIMIZER_SWITCH_FIRSTMATCH | \
OPTIMIZER_SWITCH_LOOSE_SCAN | \
OPTIMIZER_SWITCH_MATERIALIZATION | \
OPTIMIZER_SWITCH_IN_TO_EXISTS | \
OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\

View File

@ -304,7 +304,7 @@ static const char *optimizer_switch_names[]=
"index_merge","index_merge_union","index_merge_sort_union",
"index_merge_intersection",
"index_condition_pushdown",
"firstmatch","loosescan","materialization", "semijoin",
"firstmatch","loosescan","materialization","in_to_exists","semijoin",
"partial_match_rowid_merge",
"partial_match_table_scan",
"subquery_cache",
@ -325,6 +325,7 @@ static const unsigned int optimizer_switch_names_len[]=
sizeof("firstmatch") - 1,
sizeof("loosescan") - 1,
sizeof("materialization") - 1,
sizeof("in_to_exists") - 1,
sizeof("semijoin") - 1,
sizeof("partial_match_rowid_merge") - 1,
sizeof("partial_match_table_scan") - 1,
@ -412,9 +413,10 @@ static const char *optimizer_switch_str="index_merge=on,index_merge_union=on,"
"firstmatch=on,"
"loosescan=on,"
"materialization=on,"
"in_to_exists=on,"
"semijoin=on,"
"partial_match_rowid_merge=on,"
"partial_match_table_scan=on,"
"partial_match_table_scan=on,"
"subquery_cache=on"
#ifndef DBUG_OFF
",table_elimination=on";
@ -7233,7 +7235,7 @@ The minimum value for this variable is 4096.",
{"optimizer_switch", OPT_OPTIMIZER_SWITCH,
"optimizer_switch=option=val[,option=val...], where option={index_merge, "
"index_merge_union, index_merge_sort_union, index_merge_intersection, "
"index_condition_pushdown, firstmatch, loosescan, materialization, "
"index_condition_pushdown, firstmatch, loosescan, materialization, in_to_exists, "
"semijoin, partial_match_rowid_merge, partial_match_table_scan, "
"subquery_cache"
#ifndef DBUG_OFF

View File

@ -165,7 +165,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
thd->thd_marker.emb_on_expr_nest && // 5
select_lex->outer_select()->join && // 6
parent_unit->first_select()->leaf_tables && // 7
in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED && // 8
!in_subs->in_strategy && // 8
select_lex->outer_select()->leaf_tables && // 9
!((join->select_options | // 10
select_lex->outer_select()->join->select_options) // 10
@ -185,65 +185,71 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
else
{
DBUG_PRINT("info", ("Subquery can't be converted to semi-join"));
/*
Check if the subquery predicate can be executed via materialization.
The required conditions are:
1. Subquery predicate is an IN/=ANY subq predicate
2. Subquery is a single SELECT (not a UNION)
3. Subquery is not a table-less query. In this case there is no
point in materializing.
3A The upper query is not a table-less SELECT ... FROM DUAL. We
if (!optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS) &&
!optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION))
my_error(ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES, MYF(0));
if (in_subs)
{
/* Subquery predicate is an IN/=ANY predicate. */
if (optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS))
in_subs->in_strategy|= SUBS_IN_TO_EXISTS;
if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION))
in_subs->in_strategy|= SUBS_MATERIALIZATION;
/*
Check if the subquery predicate can be executed via materialization.
The required conditions are:
1. Subquery is a single SELECT (not a UNION)
2. Subquery is not a table-less query. In this case there is no
point in materializing.
2A The upper query is not a table-less SELECT ... FROM DUAL. We
can't do materialization for SELECT .. FROM DUAL because it
does not call setup_subquery_materialization(). We could make
SELECT ... FROM DUAL call that function but that doesn't seem
to be the case that is worth handling.
4. Either the subquery predicate is a top-level predicate, or at
least one partial match strategy is enabled. If no partial match
strategy is enabled, then materialization cannot be used for
non-top-level queries because it cannot handle NULLs correctly.
5. Subquery is non-correlated
TODO:
This is an overly restrictive condition. It can be extended to:
(Subquery is non-correlated ||
Subquery is correlated to any query outer to IN predicate ||
(Subquery is correlated to the immediate outer query &&
Subquery !contains {GROUP BY, ORDER BY [LIMIT],
aggregate functions}) && subquery predicate is not under "NOT IN"))
6. No execution method was already chosen (by a prepared statement).
3. Either the subquery predicate is a top-level predicate, or at
least one partial match strategy is enabled. If no partial match
strategy is enabled, then materialization cannot be used for
non-top-level queries because it cannot handle NULLs correctly.
4. Subquery is non-correlated
TODO:
This is an overly restrictive condition. It can be extended to:
(Subquery is non-correlated ||
Subquery is correlated to any query outer to IN predicate ||
(Subquery is correlated to the immediate outer query &&
Subquery !contains {GROUP BY, ORDER BY [LIMIT],
aggregate functions}) && subquery predicate is not under "NOT IN"))
(*) The subquery must be part of a SELECT statement. The current
condition also excludes multi-table update statements.
Determine whether we will perform subquery materialization before
calling the IN=>EXISTS transformation, so that we know whether to
perform the whole transformation or only that part of it which wraps
Item_in_subselect in an Item_in_optimizer.
*/
if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION) &&
in_subs && // 1
!select_lex->is_part_of_union() && // 2
parent_unit->first_select()->leaf_tables && // 3
thd->lex->sql_command == SQLCOM_SELECT && // *
select_lex->outer_select()->leaf_tables && // 3A
subquery_types_allow_materialization(in_subs) &&
// psergey-todo: duplicated_subselect_card_check: where it's done?
(in_subs->is_top_level_item() ||
optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) ||
optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) &&//4
!in_subs->is_correlated && // 5
in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6
{
/*
Materialization is possible, later the optimize phase of each
subquery will choose either materialization or in-to-exists based
on cost.
(*) The subquery must be part of a SELECT statement. The current
condition also excludes multi-table update statements.
*/
in_subs->exec_method= Item_in_subselect::MATERIALIZATION;
}
else if (in_subs)
{
/* Materialization is not possible at all. */
in_subs->exec_method= Item_in_subselect::IN_TO_EXISTS;
if (!(in_subs->in_strategy & SUBS_MATERIALIZATION &&
!select_lex->is_part_of_union() && // 1
parent_unit->first_select()->leaf_tables && // 2
thd->lex->sql_command == SQLCOM_SELECT && // *
select_lex->outer_select()->leaf_tables && // 2A
subquery_types_allow_materialization(in_subs) &&
// psergey-todo: duplicated_subselect_card_check: where it's done?
(in_subs->is_top_level_item() || //3
optimizer_flag(thd,
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) || //3
optimizer_flag(thd,
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) && //3
!in_subs->is_correlated)) //4
{
/* Materialization is not possible based on syntactic properties. */
in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
}
if (!in_subs->in_strategy)
{
/*
If neither materialization is possible, nor the user chose
IN-TO-EXISTS, choose IN-TO-EXISTS as the only universal strategy.
*/
in_subs->in_strategy|= SUBS_IN_TO_EXISTS;
}
}
/*
@ -533,13 +539,13 @@ skip_conversion:
}
/*
Revert to the IN->EXISTS strategy in the rare case when the subquery could
be flattened.
not be flattened.
TODO: This is a limitation done for simplicity. Such subqueries could also
be executed via materialization. In order to determine this, we should
re-run the test for materialization that was done in
check_and_do_in_subquery_rewrites.
*/
(*in_subq)->exec_method= Item_in_subselect::IN_TO_EXISTS;
(*in_subq)->in_strategy= SUBS_IN_TO_EXISTS;
}
if (arena)
@ -800,8 +806,7 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred)
/* 3. Remove the original subquery predicate from the WHERE/ON */
// The subqueries were replaced for Item_int(1) earlier
subq_pred->exec_method=
Item_in_subselect::SEMI_JOIN; // for subsequent executions
subq_pred->in_strategy= SUBS_SEMI_JOIN; // for subsequent executions
/*TODO: also reset the 'with_subselect' there. */
/* n. Adjust the parent_join->tables counter */
@ -3393,6 +3398,7 @@ int rewrite_to_index_subquery_engine(JOIN *join)
JOIN_TAB* join_tab=join->join_tab;
SELECT_LEX_UNIT *unit= join->unit;
DBUG_ENTER("rewrite_to_index_subquery_engine");
/*
is this simple IN subquery?
*/
@ -3564,56 +3570,157 @@ bool JOIN::optimize_unflattened_subqueries()
}
bool JOIN::choose_subquery_plan()
bool JOIN::choose_subquery_plan(table_map join_tables)
{
double mat_strategy_cost; /* The cost to compute IN via materialization. */
double in_exists_strategy_cost; /* The cost of the IN->EXISTS strategy. */
bool res;
/* The original QEP of the subquery. */
DYNAMIC_ARRAY save_keyuse; /* Copy of the JOIN::keyuse array. */
POSITION save_best_positions[MAX_TABLES+1]; /* Copy of JOIN::best_positions */
/* Copies of the JOIN_TAB::keyuse pointers for each JOIN_TAB. */
KEYUSE *save_join_tab_keyuse[MAX_TABLES];
/* Copies of JOIN_TAB::checked_keys for each JOIN_TAB. */
key_map save_join_tab_checked_keys[MAX_TABLES];
DBUG_ASSERT(in_to_exists_where || in_to_exists_having);
DBUG_ASSERT(select_lex->master_unit()->item &&
(select_lex->master_unit()->item->substype() ==
Item_subselect::IN_SUBS ||
select_lex->master_unit()->item->substype() ==
Item_subselect::ALL_SUBS ||
select_lex->master_unit()->item->substype() ==
Item_subselect::ANY_SUBS));
bool in_exists_reoptimized= false;
Item_in_subselect *in_subs;
Item_in_subselect *in_subs= (Item_in_subselect*)
select_lex->master_unit()->item;
/* Always revert to IN->EXISTS. */
mat_strategy_cost= 0;
in_exists_strategy_cost= 1;
/*
If (1) materialization is a possible strategy based on static analysis, and
(2) it is cheaper strategy than the IN->EXISTS transformation, then compute
in_subs via the materialization trategy.
*/
if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION && // 1
mat_strategy_cost < in_exists_strategy_cost) // 2
if (select_lex->master_unit()->item &&
select_lex->master_unit()->item->is_in_predicate())
{
if (in_subs->setup_mat_engine())
{
/*
In some cases it is not possible to create usable indexes for the
materialization strategy, so fall back to IN->EXISTS.
*/
in_subs->exec_method= Item_in_subselect::IN_TO_EXISTS;
}
in_subs= (Item_in_subselect*) select_lex->master_unit()->item;
if (in_subs->create_in_to_exists_cond(this))
return true;
}
else
return false;
DBUG_ASSERT(in_subs->in_strategy); /* A strategy must be chosen earlier. */
DBUG_ASSERT(in_to_exists_where || in_to_exists_having);
DBUG_ASSERT(!in_to_exists_where || in_to_exists_where->fixed);
DBUG_ASSERT(!in_to_exists_having || in_to_exists_having->fixed);
save_keyuse.elements= 0;
save_keyuse.buffer= NULL;
/*
Compute and compare the costs of materialization and in-exists if both
strategies are possible and allowed by the user (checked during the prepare
phase.
*/
if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
in_subs->in_strategy & SUBS_IN_TO_EXISTS)
{
JOIN *outer_join= unit->outer_select() ? unit->outer_select()->join : NULL;
JOIN *inner_join= this;
/* Cost of the outer JOIN. */
double outer_read_time= 0, outer_record_count= 0;
/* Cost of the unmodified subquery. */
double inner_read_time_1= 0, inner_record_count_1= 0;
/* Cost of the subquery with injected IN-EXISTS predicates. */
double inner_read_time_2= 0, inner_record_count_2= 0;
/* The cost to compute IN via materialization. */
double materialize_strategy_cost= 0;
/* The cost of the IN->EXISTS strategy. */
double in_exists_strategy_cost= 1;
if (outer_join)
get_partial_join_cost(outer_join, outer_join->tables,
&outer_read_time, &outer_record_count);
else
{
/* TODO: outer_join can be NULL for DELETE statements. */
outer_read_time= 1; /* TODO */
outer_record_count= 1; /* TODO */
}
get_partial_join_cost(inner_join, inner_join->tables,
&inner_read_time_1, &inner_record_count_1);
if (in_to_exists_where && const_tables != tables)
{
/*
Re-optimize and cost the subquery taking into account the IN-EXISTS
conditions.
*/
if (save_query_plan(&save_keyuse, save_best_positions,
save_join_tab_keyuse, save_join_tab_checked_keys) ||
reoptimize(in_to_exists_where, join_tables, save_best_positions))
return TRUE;
in_exists_reoptimized= true;
get_partial_join_cost(inner_join, inner_join->tables,
&inner_read_time_2, &inner_record_count_2);
}
else
{
/* Reoptimization would not produce any better plan. */
inner_read_time_2= inner_read_time_1;
}
/* Compute execution costs. */
/*
Previous optimizer phases should have chosen either a materialization
or IN->EXISTS strategy.
1. Compute the cost of the materialization strategy.
*/
DBUG_ASSERT(in_subs->exec_method == Item_in_subselect::IN_TO_EXISTS);
double materialization_cost; /* The cost of executing the subquery and */
/* storing its result in an indexed temp table.*/
/* The cost of a lookup into the unique index of the materialized table. */
double lookup_cost;
double write_row_cost= 1; /* TODO: what is the real cost to write a row? */
materialization_cost= inner_read_time_1 +
inner_record_count_1 * write_row_cost;
/*
The cost of a hash/btree lookup into a unique index of a materialized
subquery.
TIMOUR: TODO: the block of code below is exact copy/paste from
opt_subselect.cc:optimize_semi_join_nests() - refactor it.
*/
uint rowlen= get_tmp_table_rec_length(unit->first_select()->item_list);
if (rowlen * inner_record_count_1 < thd->variables.max_heap_table_size)
lookup_cost= HEAP_TEMPTABLE_LOOKUP_COST;
else
lookup_cost= DISK_TEMPTABLE_LOOKUP_COST;
materialize_strategy_cost= materialization_cost +
outer_record_count * lookup_cost;
/*
2. Compute the cost of the IN=>EXISTS strategy.
*/
in_exists_strategy_cost= outer_record_count * inner_read_time_2;
/* Compare the costs and choose the cheaper strategy. */
if (materialize_strategy_cost >= in_exists_strategy_cost)
in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
else
in_subs->in_strategy&= ~SUBS_IN_TO_EXISTS;
}
if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION)
/*
If (1) materialization is a possible strategy based on static analysis
during the prepare phase, then if
(2) it is more expensive than the IN->EXISTS transformation, and
(3) it is not possible to create usable indexes for the materialization
strategy,
fall back to IN->EXISTS.
otherwise use materialization.
*/
if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
in_subs->setup_mat_engine())
{
/*
If materialization was the cheaper or the only user-selected strategy,
but it is not possible to execute it due to limitations in the
implementation, fall back to IN-TO-EXISTS.
*/
in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
in_subs->in_strategy|= SUBS_IN_TO_EXISTS;
}
if (in_subs->in_strategy & SUBS_MATERIALIZATION)
{
/* Restore orginal query plan used for materialization. */
if (in_exists_reoptimized)
restore_query_plan(&save_keyuse, save_best_positions,
save_join_tab_keyuse, save_join_tab_checked_keys);
/* TODO: should we set/unset this flag for both select_lex and its unit? */
in_subs->unit->uncacheable&= ~UNCACHEABLE_DEPENDENT;
select_lex->uncacheable&= ~UNCACHEABLE_DEPENDENT;
@ -3633,13 +3740,32 @@ bool JOIN::choose_subquery_plan()
set in the beginning of JOIN::optimize, which was already done.
*/
select_limit= in_subs->unit->select_limit_cnt;
// TODO: inner_join->restore_plan (keyuse, best_positions, best_read)
}
else if (in_subs->exec_method == Item_in_subselect::IN_TO_EXISTS)
res= in_subs->inject_in_to_exists_cond(this);
else if (in_subs->in_strategy & SUBS_IN_TO_EXISTS)
{
/* Keep the new query plan with injected conditions, delete the old one. */
if (save_keyuse.elements)
{
DBUG_ASSERT(in_exists_reoptimized);
delete_dynamic(&save_keyuse);
}
if (!in_exists_reoptimized && in_to_exists_where && const_tables != tables)
{
for (uint i= 0; i < tables; i++)
{
join_tab[i].keyuse= NULL;
join_tab[i].checked_keys.clear_all();
}
if (reoptimize(in_to_exists_where, join_tables, NULL))
return TRUE;
}
if (in_subs->inject_in_to_exists_cond(this))
return TRUE;
}
else
DBUG_ASSERT(FALSE);
return res;
return FALSE;
}

View File

@ -6245,3 +6245,5 @@ ER_UNKNOWN_OPTION
eng "Unknown option '%-.64s'"
ER_BAD_OPTION_VALUE
eng "Incorrect value '%-.64s' for option '%-.64s'"
ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES
eng "At least one of the 'in_to_exists' or 'materialization' optimizer_switch flags must be 'on'."

View File

@ -2157,8 +2157,8 @@ void st_select_lex::print_limit(THD *thd,
select_limit == 1, and there should be no offset_limit.
*/
(((subs_type == Item_subselect::IN_SUBS) &&
((Item_in_subselect*)item)->exec_method ==
Item_in_subselect::MATERIALIZATION) ?
((Item_in_subselect*)item)->in_strategy &
SUBS_MATERIALIZATION) ?
TRUE :
(select_limit->val_int() == 1LL) &&
offset_limit == 0));
@ -3096,25 +3096,11 @@ bool st_select_lex::optimize_unflattened_subqueries()
Item_subselect *subquery_predicate= un->item;
if (subquery_predicate)
{
Item_in_subselect *item_in= NULL;
if (subquery_predicate->substype() == Item_subselect::IN_SUBS ||
subquery_predicate->substype() == Item_subselect::ALL_SUBS ||
subquery_predicate->substype() == Item_subselect::ANY_SUBS)
item_in= (Item_in_subselect*) subquery_predicate;
for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select())
{
JOIN *inner_join= sl->join;
SELECT_LEX *save_select= un->thd->lex->current_select;
int res;
/*
Make sure that we do not create IN->EXISTS conditions for
subquery predicates that were substituted by Item_maxmin_subselect
or by Item_singlerow_subselect.
*/
DBUG_ASSERT(!item_in || (item_in && !item_in->is_min_max_optimized));
if (item_in && item_in->create_in_to_exists_cond(inner_join))
return TRUE;
/* We need only 1 row to determine existence */
un->set_limit(un->global_parameters);
un->thd->lex->current_select= sl;

View File

@ -53,9 +53,9 @@ static bool make_join_statistics(JOIN *join, TABLE_LIST *leaves, COND *conds,
static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,
JOIN_TAB *join_tab,
uint tables, COND *conds,
COND_EQUAL *cond_equal,
table_map table_map, SELECT_LEX *select_lex,
st_sargable_param **sargables);
static bool sort_and_filter_keyuse(DYNAMIC_ARRAY *keyuse);
static int sort_keyuse(KEYUSE *a,KEYUSE *b);
static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
table_map used_tables);
@ -928,24 +928,29 @@ JOIN::optimize()
error= 0;
if (optimize_unflattened_subqueries())
DBUG_RETURN(1);
if (in_to_exists_where || in_to_exists_having)
{
/*
TIMOUR: TODO: refactor this block and JOIN::choose_subquery_plan
*/
Item_in_subselect *in_subs= (Item_in_subselect*)
select_lex->master_unit()->item;
/*
TIMOUR: TODO: consider do we need to optimize here at all and refactor
this block and JOIN::choose_subquery_plan.
if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION)
; // setup materialized execution structures
else if (in_subs->exec_method == Item_in_subselect::IN_TO_EXISTS)
if (choose_subquery_plan())
DBUG_RETURN(1);
*/
Item_in_subselect *in_subs;
if (select_lex->master_unit()->item &&
select_lex->master_unit()->item->is_in_predicate())
{
in_subs= (Item_in_subselect*) select_lex->master_unit()->item;
if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
in_subs->setup_mat_engine())
in_subs->in_strategy= SUBS_IN_TO_EXISTS;
if (in_subs->in_strategy & SUBS_IN_TO_EXISTS)
{
if (in_subs->create_in_to_exists_cond(this))
DBUG_RETURN(1);
if (in_subs->inject_in_to_exists_cond(this))
DBUG_RETURN(1);
tmp_having= having;
}
else
DBUG_ASSERT(FALSE);
}
DBUG_RETURN(0);
}
@ -2828,10 +2833,14 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
}
if (conds || outer_join)
{
if (update_ref_and_keys(join->thd, keyuse_array, stat, join->tables,
conds, join->cond_equal,
~outer_join, join->select_lex, &sargables))
conds, ~outer_join, join->select_lex, &sargables))
goto error;
if (keyuse_array->elements && sort_and_filter_keyuse(keyuse_array))
goto error;
DBUG_EXECUTE("opt", print_keyuse_array(keyuse_array););
}
join->const_table_map= no_rows_const_tables;
join->const_tables= const_count;
@ -3135,8 +3144,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
sizeof(POSITION)*join->const_tables);
join->best_read=1.0;
}
if ((join->in_to_exists_where || join->in_to_exists_having)
&& join->choose_subquery_plan())
if (join->choose_subquery_plan(all_table_map & ~join->const_table_map))
goto error;
/* Generate an execution plan from the found optimal join order. */
@ -4089,11 +4097,10 @@ static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
static bool
update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
uint tables, COND *cond, COND_EQUAL *cond_equal,
table_map normal_tables, SELECT_LEX *select_lex,
SARGABLE_PARAM **sargables)
uint tables, COND *cond, table_map normal_tables,
SELECT_LEX *select_lex, SARGABLE_PARAM **sargables)
{
uint and_level,i,found_eq_constant;
uint and_level,i;
KEY_FIELD *key_fields, *end, *field;
uint sz;
uint m= max(select_lex->max_equal_elems,1);
@ -4189,67 +4196,76 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
return TRUE;
}
/*
Sort the array of possible keys and remove the following key parts:
- ref if there is a keypart which is a ref and a const.
(e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
then we skip the key part corresponding to b=t2.d)
- keyparts without previous keyparts
(e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
used in the query, we drop the partial key parts from consideration).
Special treatment for ft-keys.
*/
if (keyuse->elements)
{
KEYUSE key_end,*prev,*save_pos,*use;
my_qsort(keyuse->buffer,keyuse->elements,sizeof(KEYUSE),
(qsort_cmp) sort_keyuse);
bzero((char*) &key_end,sizeof(key_end)); /* Add for easy testing */
if (insert_dynamic(keyuse,(uchar*) &key_end))
return TRUE;
use=save_pos=dynamic_element(keyuse,0,KEYUSE*);
prev= &key_end;
found_eq_constant=0;
for (i=0 ; i < keyuse->elements-1 ; i++,use++)
{
if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
use->table->const_key_parts[use->key]|= use->keypart_map;
if (use->keypart != FT_KEYPART)
{
if (use->key == prev->key && use->table == prev->table)
{
if (prev->keypart+1 < use->keypart ||
(prev->keypart == use->keypart && found_eq_constant))
continue; /* remove */
}
else if (use->keypart != 0) // First found must be 0
continue;
}
#ifdef HAVE_valgrind
/* Valgrind complains about overlapped memcpy when save_pos==use. */
if (save_pos != use)
#endif
*save_pos= *use;
prev=use;
found_eq_constant= !use->used_tables;
/* Save ptr to first use */
if (!use->table->reginfo.join_tab->keyuse)
use->table->reginfo.join_tab->keyuse=save_pos;
use->table->reginfo.join_tab->checked_keys.set_bit(use->key);
save_pos++;
}
i=(uint) (save_pos-(KEYUSE*) keyuse->buffer);
VOID(set_dynamic(keyuse,(uchar*) &key_end,i));
keyuse->elements=i;
}
DBUG_EXECUTE("opt", print_keyuse_array(keyuse););
return FALSE;
}
/**
Sort the array of possible keys and remove the following key parts:
- ref if there is a keypart which is a ref and a const.
(e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
then we skip the key part corresponding to b=t2.d)
- keyparts without previous keyparts
(e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
used in the query, we drop the partial key parts from consideration).
Special treatment for ft-keys.
*/
static bool sort_and_filter_keyuse(DYNAMIC_ARRAY *keyuse)
{
KEYUSE key_end, *prev, *save_pos, *use;
uint found_eq_constant, i;
DBUG_ASSERT(keyuse->elements);
my_qsort(keyuse->buffer, keyuse->elements, sizeof(KEYUSE),
(qsort_cmp) sort_keyuse);
bzero((char*) &key_end, sizeof(key_end)); /* Add for easy testing */
if (insert_dynamic(keyuse, (uchar*) &key_end))
return TRUE;
use= save_pos= dynamic_element(keyuse,0,KEYUSE*);
prev= &key_end;
found_eq_constant= 0;
for (i=0 ; i < keyuse->elements-1 ; i++,use++)
{
if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
use->table->const_key_parts[use->key]|= use->keypart_map;
if (use->keypart != FT_KEYPART)
{
if (use->key == prev->key && use->table == prev->table)
{
if (prev->keypart+1 < use->keypart ||
(prev->keypart == use->keypart && found_eq_constant))
continue; /* remove */
}
else if (use->keypart != 0) // First found must be 0
continue;
}
#ifdef HAVE_valgrind
/* Valgrind complains about overlapped memcpy when save_pos==use. */
if (save_pos != use)
#endif
*save_pos= *use;
prev= use;
found_eq_constant= !use->used_tables;
/* Save ptr to first use */
if (!use->table->reginfo.join_tab->keyuse)
use->table->reginfo.join_tab->keyuse=save_pos;
use->table->reginfo.join_tab->checked_keys.set_bit(use->key);
save_pos++;
}
i= (uint) (save_pos-(KEYUSE*) keyuse->buffer);
VOID(set_dynamic(keyuse,(uchar*) &key_end,i));
keyuse->elements= i;
return FALSE;
}
/**
Update some values in keyuse for faster choose_plan() loop.
*/
@ -19265,6 +19281,131 @@ bool JOIN::change_result(select_result *res)
DBUG_RETURN(FALSE);
}
/**
Save the original query execution plan so that the caller can revert to it
if needed.
*/
int JOIN::save_query_plan(DYNAMIC_ARRAY *save_keyuse,
POSITION *save_best_positions,
KEYUSE **save_join_tab_keyuse,
key_map *save_join_tab_checked_keys)
{
if (keyuse.elements)
{
DYNAMIC_ARRAY tmp_keyuse;
if (my_init_dynamic_array(save_keyuse, sizeof(KEYUSE), 20, 64))
return 1;
/* Swap the current and the backup keyuse arrays. */
tmp_keyuse= keyuse;
keyuse= (*save_keyuse);
(*save_keyuse)= tmp_keyuse;
for (uint i= 0; i < tables; i++)
{
save_join_tab_keyuse[i]= join_tab[i].keyuse;
join_tab[i].keyuse= NULL;
save_join_tab_checked_keys[i]= join_tab[i].checked_keys;
join_tab[i].checked_keys.clear_all();
}
}
memcpy((uchar*) save_best_positions, (uchar*) best_positions,
sizeof(POSITION) * (tables + 1));
memset(best_positions, 0, sizeof(POSITION) * (tables + 1));
return 0;
}
/**
Restore the query plan saved before reoptimization with additional
conditions.
*/
void JOIN::restore_query_plan(DYNAMIC_ARRAY *save_keyuse,
POSITION *save_best_positions,
KEYUSE **save_join_tab_keyuse,
key_map *save_join_tab_checked_keys)
{
if (save_keyuse->elements)
{
DYNAMIC_ARRAY tmp_keyuse;
tmp_keyuse= keyuse;
keyuse= (*save_keyuse);
(*save_keyuse)= tmp_keyuse;
delete_dynamic(save_keyuse);
for (uint i= 0; i < tables; i++)
{
join_tab[i].keyuse= save_join_tab_keyuse[i];
join_tab[i].checked_keys= save_join_tab_checked_keys[i];
}
}
memcpy((uchar*) best_positions, (uchar*) save_best_positions,
sizeof(POSITION) * (tables + 1));
}
/**
Reoptimize a query plan taking into account an additional conjunct to the
WHERE clause.
*/
int JOIN::reoptimize(Item *added_where, table_map join_tables,
POSITION *save_best_positions)
{
DYNAMIC_ARRAY added_keyuse;
SARGABLE_PARAM *sargables= 0; /* Used only as a dummy parameter. */
if (my_init_dynamic_array(&added_keyuse, sizeof(KEYUSE), 20, 64))
{
delete_dynamic(&added_keyuse);
return 1;
}
/* Re-run the REF optimizer to take into account the new conditions. */
if (update_ref_and_keys(thd, &added_keyuse, join_tab, tables, added_where,
~outer_join, select_lex, &sargables))
{
delete_dynamic(&added_keyuse);
return 1;
}
if (!added_keyuse.elements)
{
/* No need to optimize if no new access methods were discovered. */
if (save_best_positions)
memcpy((uchar*) best_positions, (uchar*) save_best_positions,
sizeof(POSITION) * (tables + 1));
delete_dynamic(&added_keyuse);
return 0;
}
/* Add the new access methods to the keyuse array. */
if (!keyuse.buffer &&
my_init_dynamic_array(&keyuse, sizeof(KEYUSE), 20, 64))
{
delete_dynamic(&added_keyuse);
return 1;
}
allocate_dynamic(&keyuse, keyuse.elements + added_keyuse.elements);
memcpy(keyuse.buffer + keyuse.elements * keyuse.size_of_element,
added_keyuse.buffer,
(size_t) added_keyuse.elements * added_keyuse.size_of_element);
keyuse.elements+= added_keyuse.elements;
delete_dynamic(&added_keyuse);
if (sort_and_filter_keyuse(&keyuse))
return 1;
optimize_keyuse(this, &keyuse);
/* Re-run the join optimizer to compute a new query plan. */
if (choose_plan(this, join_tables))
return 1;
return 0;
}
/**
@} (end of group Query_Optimizer)
*/

View File

@ -1369,8 +1369,21 @@ inline bool sj_is_materialize_strategy(uint strategy)
class JOIN :public Sql_alloc
{
private:
JOIN(const JOIN &rhs); /**< not implemented */
JOIN& operator=(const JOIN &rhs); /**< not implemented */
protected:
/* Support for plan reoptimization with rewritten conditions. */
int reoptimize(Item *added_where, table_map join_tables,
POSITION *save_best_positions);
int save_query_plan(DYNAMIC_ARRAY *save_keyuse, POSITION *save_positions,
KEYUSE **save_join_tab_keyuse,
key_map *save_join_tab_checked_keys);
void restore_query_plan(DYNAMIC_ARRAY *save_keyuse, POSITION *save_positions,
KEYUSE **save_join_tab_keyuse,
key_map *save_join_tab_checked_keys);
public:
JOIN_TAB *join_tab,**best_ref;
JOIN_TAB **map2table; ///< mapping between table indexes and JOIN_TABs
@ -1746,7 +1759,7 @@ public:
NULL : join_tab+const_tables;
}
bool setup_subquery_caches();
bool choose_subquery_plan();
bool choose_subquery_plan(table_map join_tables);
private:
/**
TRUE if the query contains an aggregate function but has no GROUP