1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

MWL#90: Subqueries: Inside-out execution for non-semijoin materialized subqueries that are AND-parts of the WHERE

- Merge into 5.3-subqueries
This commit is contained in:
Sergey Petrunya
2010-05-24 15:24:20 +04:00
41 changed files with 1269 additions and 675 deletions

View File

@@ -2,7 +2,7 @@
@file
@brief
Subquery optimization code here.
Semi-join subquery optimizations code
*/
@@ -16,7 +16,6 @@
#include <my_bit.h>
// Our own:
static
bool subquery_types_allow_materialization(Item_in_subselect *in_subs);
static bool replace_where_subcondition(JOIN *join, Item **expr,
@@ -25,6 +24,8 @@ static bool replace_where_subcondition(JOIN *join, Item **expr,
static int subq_sj_candidate_cmp(Item_in_subselect* const *el1,
Item_in_subselect* const *el2);
static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred);
static bool convert_subq_to_jtbm(JOIN *parent_join,
Item_in_subselect *subq_pred, bool *remove);
static TABLE_LIST *alloc_join_nest(THD *thd);
static
void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist);
@@ -50,17 +51,22 @@ static void remove_subq_pushed_predicates(JOIN *join, Item **where);
/*
Check if we need JOIN::prepare()-phase subquery rewrites and if yes, do them
SYNOPSIS
check_and_do_in_subquery_rewrites()
join Subquery's join
DESCRIPTION
Check if we need to do
- subquery->semi-join rewrite
- subquery -> mergeable semi-join rewrite
- if the subquery can be handled with materialization
- 'substitution' rewrite for table-less subqueries like "(select 1)"
and mark appropriately
- IN->EXISTS rewrite
and, depending on the rewrite, either do it, or record it to be done at a
later phase.
RETURN
0 - OK
-1 - Some sort of query error
0 - OK
Other - Some sort of query error
*/
int check_and_do_in_subquery_rewrites(JOIN *join)
@@ -166,6 +172,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
(void)subquery_types_allow_materialization(in_subs);
in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest;
in_subs->is_flattenable_semijoin= TRUE;
/* Register the subquery for further processing in flatten_subqueries() */
select_lex->
@@ -220,10 +227,23 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
(in_subs->is_top_level_item() ||
optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) ||
optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) &&//4
!in_subs->is_correlated && // 5
in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6
!in_subs->is_correlated) // 5
{
if (in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED)
in_subs->exec_method= Item_in_subselect::MATERIALIZATION;
/*
If the subquery is an AND-part of WHERE register for being processed
with jtbm strategy
*/
if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION &&
thd->thd_marker.emb_on_expr_nest == (TABLE_LIST*)0x1)
{
in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest;
in_subs->is_flattenable_semijoin= FALSE;
select_lex->outer_select()->
join->sj_subselects.append(thd->mem_root, in_subs);
}
}
Item_subselect::trans_res trans_res;
@@ -338,6 +358,69 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs)
}
/*
Finalize IN->EXISTS conversion in case we couldn't use materialization.
DESCRIPTION Invoke the IN->EXISTS converter
Replace the Item_in_subselect with its wrapper Item_in_optimizer in WHERE.
RETURN
FALSE - Ok
TRUE - Fatal error
*/
static
bool make_in_exists_conversion(THD *thd, JOIN *join, Item_in_subselect *item)
{
DBUG_ENTER("make_in_exists_conversion");
JOIN *child_join= item->unit->first_select()->join;
Item_subselect::trans_res res;
item->changed= 0;
item->fixed= 0;
SELECT_LEX *save_select_lex= thd->lex->current_select;
thd->lex->current_select= item->unit->first_select();
res= item->select_transformer(child_join);
thd->lex->current_select= save_select_lex;
if (res == Item_subselect::RES_ERROR)
DBUG_RETURN(TRUE);
item->changed= 1;
item->fixed= 1;
Item *substitute= item->substitution;
bool do_fix_fields= !item->substitution->fixed;
/*
The Item_subselect has already been wrapped with Item_in_optimizer, so we
should search for item->optimizer, not 'item'.
*/
Item *replace_me= item->optimizer;
DBUG_ASSERT(replace_me==substitute);
Item **tree= (item->emb_on_expr_nest == (TABLE_LIST*)1)?
&join->conds : &(item->emb_on_expr_nest->on_expr);
if (replace_where_subcondition(join, tree, replace_me, substitute,
do_fix_fields))
DBUG_RETURN(TRUE);
item->substitution= NULL;
if (!thd->stmt_arena->is_conventional())
{
tree= (item->emb_on_expr_nest == (TABLE_LIST*)1)?
&join->select_lex->prep_where :
&(item->emb_on_expr_nest->prep_on_expr);
if (replace_where_subcondition(join, tree, replace_me, substitute,
FALSE))
DBUG_RETURN(TRUE);
}
DBUG_RETURN(FALSE);
}
/*
Convert semi-join subquery predicates into semi-join join nests
@@ -445,25 +528,41 @@ bool convert_join_subqueries_to_semijoins(JOIN *join)
// #tables-in-parent-query + #tables-in-subquery < MAX_TABLES
/* Replace all subqueries to be flattened with Item_int(1) */
arena= thd->activate_stmt_arena_if_needed(&backup);
for (in_subq= join->sj_subselects.front();
in_subq != in_subq_end &&
join->tables + (*in_subq)->unit->first_select()->join->tables < MAX_TABLES;
in_subq++)
{
Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)?
&join->conds : &((*in_subq)->emb_on_expr_nest->on_expr);
if (replace_where_subcondition(join, tree, *in_subq, new Item_int(1),
FALSE))
DBUG_RETURN(TRUE); /* purecov: inspected */
}
for (in_subq= join->sj_subselects.front();
in_subq != in_subq_end &&
join->tables + (*in_subq)->unit->first_select()->join->tables < MAX_TABLES;
in_subq++)
{
if (convert_subq_to_sj(join, *in_subq))
DBUG_RETURN(TRUE);
bool remove_item= TRUE;
if ((*in_subq)->is_flattenable_semijoin)
{
if (convert_subq_to_sj(join, *in_subq))
DBUG_RETURN(TRUE);
}
else
{
if (convert_subq_to_jtbm(join, *in_subq, &remove_item))
DBUG_RETURN(TRUE);
}
if (remove_item)
{
Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)?
&join->conds : &((*in_subq)->emb_on_expr_nest->on_expr);
Item *replace_me= *in_subq;
/*
JTBM: the subquery was already mapped with Item_in_optimizer, so we
should search for that, not for original Item_in_subselect.
TODO: what about delaying that rewrite until here?
*/
if (!(*in_subq)->is_flattenable_semijoin)
{
replace_me= (*in_subq)->optimizer;
}
if (replace_where_subcondition(join, tree, replace_me, new Item_int(1),
FALSE))
DBUG_RETURN(TRUE); /* purecov: inspected */
}
}
skip_conversion:
/*
@@ -494,7 +593,19 @@ skip_conversion:
bool do_fix_fields= !(*in_subq)->substitution->fixed;
Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)?
&join->conds : &((*in_subq)->emb_on_expr_nest->on_expr);
if (replace_where_subcondition(join, tree, *in_subq, substitute,
Item *replace_me= *in_subq;
/*
JTBM: the subquery was already mapped with Item_in_optimizer, so we
should search for that, not for original Item_in_subselect.
TODO: what about delaying that rewrite until here?
*/
if (!(*in_subq)->is_flattenable_semijoin)
{
replace_me= (*in_subq)->optimizer;
}
if (replace_where_subcondition(join, tree, replace_me, substitute,
do_fix_fields))
DBUG_RETURN(TRUE);
(*in_subq)->substitution= NULL;
@@ -505,7 +616,7 @@ skip_conversion:
&join->select_lex->prep_where :
&((*in_subq)->emb_on_expr_nest->prep_on_expr);
if (replace_where_subcondition(join, tree, *in_subq, substitute,
if (replace_where_subcondition(join, tree, replace_me, substitute,
FALSE))
DBUG_RETURN(TRUE);
}
@@ -517,6 +628,61 @@ skip_conversion:
DBUG_RETURN(FALSE);
}
/*
Get #output_rows and scan_time estimates for a "delayed" table.
SYNOPSIS
get_delayed_table_estimates()
table IN Table to get estimates for
out_rows OUT E(#rows in the table)
scan_time OUT E(scan_time).
startup_cost OUT cost to populate the table.
DESCRIPTION
Get #output_rows and scan_time estimates for a "delayed" table. By
"delayed" here we mean that the table is filled at the start of query
execution. This means that the optimizer can't use table statistics to
get #rows estimate for it, it has to call this function instead.
This function is expected to make different actions depending on the nature
of the table. At the moment there is only one kind of delayed tables,
non-flattenable semi-joins.
*/
void get_delayed_table_estimates(TABLE *table,
ha_rows *out_rows,
double *scan_time,
double *startup_cost)
{
Item_in_subselect *item= table->pos_in_table_list->jtbm_subselect;
item->optimize();
DBUG_ASSERT(item->engine->engine_type() ==
subselect_engine::HASH_SJ_ENGINE);
subselect_hash_sj_engine *hash_sj_engine=
((subselect_hash_sj_engine*)item->engine);
JOIN *join= hash_sj_engine->materialize_join;
double rows= 1;
double read_time= 0.0;
/* Calculate #rows and cost of join execution */
for (uint i= join->const_tables; i < join->tables; i++)
{
rows *= join->best_positions[i].records_read;
read_time += join->best_positions[i].read_time;
}
*out_rows= rows;
*startup_cost= read_time;
/* Calculate cost of scanning the temptable */
double data_size= rows * hash_sj_engine->tmp_table->s->reclength;
/* Do like in handler::read_time */
*scan_time= data_size/IO_SIZE + 2;
}
/**
@brief Replaces an expression destructively inside the expression tree of
the WHERE clase.
@@ -534,6 +700,7 @@ skip_conversion:
@return <code>true</code> if there was an error, <code>false</code> if
successful.
*/
static bool replace_where_subcondition(JOIN *join, Item **expr,
Item *old_cond, Item *new_cond,
bool do_fix_fields)
@@ -769,8 +936,7 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred)
/* 3. Remove the original subquery predicate from the WHERE/ON */
// The subqueries were replaced for Item_int(1) earlier
subq_pred->exec_method=
Item_in_subselect::SEMI_JOIN; // for subsequent executions
subq_pred->exec_method= Item_in_subselect::SEMI_JOIN; // for subsequent executions
/*TODO: also reset the 'with_subselect' there. */
/* n. Adjust the parent_join->tables counter */
@@ -887,6 +1053,118 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred)
DBUG_RETURN(FALSE);
}
/*
Convert subquery predicate into non-mergeable semi-join nest.
TODO:
why does this do IN-EXISTS conversion? Can't we unify it with mergeable
semi-joins? currently, convert_subq_to_sj() cannot fail to convert (unless
fatal errors)
RETURN
FALSE - Ok
TRUE - Fatal error
*/
static bool convert_subq_to_jtbm(JOIN *parent_join,
Item_in_subselect *subq_pred,
bool *remove_item)
{
SELECT_LEX *parent_lex= parent_join->select_lex;
List<TABLE_LIST> *emb_join_list= &parent_lex->top_join_list;
TABLE_LIST *emb_tbl_nest= NULL; // will change when we learn to handle outer joins
TABLE_LIST *tl;
DBUG_ENTER("convert_subq_to_jtbm");
if (subq_pred->setup_engine(TRUE))
DBUG_RETURN(TRUE);
if (subq_pred->engine->engine_type() != subselect_engine::HASH_SJ_ENGINE)
{
*remove_item= FALSE;
make_in_exists_conversion(parent_join->thd, parent_join, subq_pred);
DBUG_RETURN(FALSE);
}
*remove_item= TRUE;
TABLE_LIST *jtbm;
char *tbl_alias;
const char alias_mask[]="SUBQUERY#%d";
if (!(tbl_alias= (char*)parent_join->thd->calloc(sizeof(alias_mask)+5)) ||
!(jtbm= alloc_join_nest(parent_join->thd))) //todo: this is not a join nest!
{
DBUG_RETURN(TRUE);
}
jtbm->join_list= emb_join_list;
jtbm->embedding= emb_tbl_nest;
jtbm->jtbm_subselect= subq_pred;
jtbm->nested_join= NULL;
/* Nests do not participate in those 'chains', so: */
/* jtbm->next_leaf= jtbm->next_local= jtbm->next_global == NULL*/
emb_join_list->push_back(jtbm);
/*
Inject the jtbm table into TABLE_LIST::next_leaf list, so that
make_join_statistics() and co. can find it.
*/
for (tl= parent_lex->leaf_tables; tl->next_leaf; tl= tl->next_leaf) ;
tl->next_leaf= jtbm;
/*
Same as above for TABLE_LIST::next_local chain
(a theory: a next_local chain always starts with ::leaf_tables
because view's tables are inserted after the view)
*/
for (tl= parent_lex->leaf_tables; tl->next_local; tl= tl->next_local) ;
tl->next_local= jtbm;
/* A theory: no need to re-connect the next_global chain */
subselect_hash_sj_engine *hash_sj_engine=
((subselect_hash_sj_engine*)subq_pred->engine);
jtbm->table= hash_sj_engine->tmp_table;
jtbm->table->tablenr= parent_join->tables;
jtbm->table->map= table_map(1) << (parent_join->tables);
parent_join->tables++;
Item *conds= hash_sj_engine->semi_join_conds;
conds->fix_after_pullout(parent_lex, &conds);
DBUG_EXECUTE("where", print_where(conds,"SJ-EXPR", QT_ORDINARY););
my_snprintf(tbl_alias, sizeof(alias_mask)+5, alias_mask,
hash_sj_engine->materialize_join->select_lex->select_number);
jtbm->alias= tbl_alias;
/* Inject sj_on_expr into the parent's WHERE or ON */
if (emb_tbl_nest)
{
DBUG_ASSERT(0);
/*emb_tbl_nest->on_expr= and_items(emb_tbl_nest->on_expr,
sj_nest->sj_on_expr);
emb_tbl_nest->on_expr->fix_fields(parent_join->thd, &emb_tbl_nest->on_expr);
*/
}
else
{
/* Inject into the WHERE */
parent_join->conds= and_items(parent_join->conds, conds);
parent_join->conds->fix_fields(parent_join->thd, &parent_join->conds);
parent_join->select_lex->where= parent_join->conds;
}
/* Don't unlink the child subselect, as the subquery will be used. */
DBUG_RETURN(FALSE);
}
static TABLE_LIST *alloc_join_nest(THD *thd)
{
TABLE_LIST *tbl;
@@ -1245,6 +1523,7 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
DBUG_RETURN(FALSE);
}
/*
Get estimated record length for semi-join materialization temptable
@@ -1301,7 +1580,7 @@ static uint get_tmp_table_rec_length(List<Item> &items)
return len;
}
//psergey-todo: is the below a kind of table elimination??
/*
Check if table's KEYUSE elements have an eq_ref(outer_tables) candidate
@@ -1318,6 +1597,8 @@ static uint get_tmp_table_rec_length(List<Item> &items)
Check again if it is feasible to factor common parts with constant table
search
Also check if it's feasible to factor common parts with table elimination
RETURN
TRUE - There exists an eq_ref(outer-tables) candidate
FALSE - Otherwise
@@ -1368,6 +1649,7 @@ bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables)
return FALSE;
}
/*
Do semi-join optimization step after we've added a new tab to join prefix
@@ -3504,3 +3786,26 @@ static void remove_subq_pushed_predicates(JOIN *join, Item **where)
}
bool do_jtbm_materialization_if_needed(JOIN_TAB *tab)
{
Item_in_subselect *in_subs;
if (tab->table->pos_in_table_list &&
(in_subs= tab->table->pos_in_table_list->jtbm_subselect))
{
DBUG_ASSERT(in_subs->engine->engine_type() ==
subselect_engine::HASH_SJ_ENGINE);
subselect_hash_sj_engine *hash_sj_engine=
((subselect_hash_sj_engine*)in_subs->engine);
if (!hash_sj_engine->is_materialized)
{
hash_sj_engine->materialize_join->exec();
hash_sj_engine->is_materialized= TRUE;
if (hash_sj_engine->materialize_join->error || tab->join->thd->is_fatal_error)
return TRUE;
}
}
return FALSE;
}