mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
Semi-join optimizations code cleanup:
- Break down POSITION/advance_sj_state() into four classes representing potential semi-join strategies. - Treat all strategies uniformly (before, DuplicateWeedout was special as it was the catch-all strategy. Now, we're still relying on it to be the catch-all, but are able to function,e.g. with firstmatch=on,duplicate_weedout=off. - Update test results (checked)
This commit is contained in:
@ -323,8 +323,8 @@ WHERE Language='English' AND Percentage > 10 AND
|
||||
t2.Population > 100000);
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 PRIMARY t1 range Population,Country Population 4 NULL 1 Using index condition; Rowid-ordered scan; Start temporary
|
||||
1 PRIMARY t2 eq_ref PRIMARY,Population PRIMARY 3 test.t1.Country 1 Using where
|
||||
1 PRIMARY t3 eq_ref PRIMARY,Percentage PRIMARY 33 test.t1.Country,const 1 Using index condition; Using where; End temporary
|
||||
1 PRIMARY t2 eq_ref PRIMARY,Population PRIMARY 3 test.t1.Country 1 Using where; End temporary
|
||||
1 PRIMARY t3 eq_ref PRIMARY,Percentage PRIMARY 33 test.t1.Country,const 1 Using index condition; Using where
|
||||
set optimizer_switch=@bug35674_save_optimizer_switch;
|
||||
DROP TABLE t1,t2,t3;
|
||||
CREATE TABLE t1 (
|
||||
|
@ -332,8 +332,8 @@ WHERE Language='English' AND Percentage > 10 AND
|
||||
t2.Population > 100000);
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 PRIMARY t1 range Population,Country Population 4 NULL 1 Using index condition; Rowid-ordered scan; Start temporary
|
||||
1 PRIMARY t2 eq_ref PRIMARY,Population PRIMARY 3 test.t1.Country 1 Using where; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan
|
||||
1 PRIMARY t3 eq_ref PRIMARY,Percentage PRIMARY 33 test.t1.Country,const 1 Using index condition; Using where; End temporary; Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
|
||||
1 PRIMARY t2 eq_ref PRIMARY,Population PRIMARY 3 test.t1.Country 1 Using where; End temporary; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan
|
||||
1 PRIMARY t3 eq_ref PRIMARY,Percentage PRIMARY 33 test.t1.Country,const 1 Using index condition; Using where; Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
|
||||
set optimizer_switch=@bug35674_save_optimizer_switch;
|
||||
DROP TABLE t1,t2,t3;
|
||||
CREATE TABLE t1 (
|
||||
|
@ -2168,70 +2168,414 @@ bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables)
|
||||
See setup_semijoin_dups_elimination() for a description of what kinds of
|
||||
join prefixes each strategy can handle.
|
||||
*/
|
||||
bool is_multiple_semi_joins(POSITION *prefix, uint idx, table_map inner_tables)
|
||||
{
|
||||
for (int i= (int)idx; i >= 0; i--)
|
||||
{
|
||||
TABLE_LIST *emb_sj_nest;
|
||||
if ((emb_sj_nest= prefix[i].table->emb_sj_nest))
|
||||
{
|
||||
if (inner_tables & emb_sj_nest->sj_inner_tables)
|
||||
return !test(inner_tables == emb_sj_nest->sj_inner_tables);
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
void advance_sj_state(JOIN *join, table_map remaining_tables,
|
||||
const JOIN_TAB *new_join_tab, uint idx,
|
||||
|
||||
void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx,
|
||||
double *current_record_count, double *current_read_time,
|
||||
POSITION *loose_scan_pos)
|
||||
{
|
||||
TABLE_LIST *emb_sj_nest;
|
||||
POSITION *pos= join->positions + idx;
|
||||
remaining_tables &= ~new_join_tab->table->map;
|
||||
bool disable_jbuf= join->thd->variables.join_cache_level == 0;
|
||||
const JOIN_TAB *new_join_tab= pos->table;
|
||||
Semi_join_strategy_picker *pickers[]=
|
||||
{
|
||||
&pos->firstmatch_picker,
|
||||
&pos->loosescan_picker,
|
||||
&pos->sjmat_picker,
|
||||
&pos->dups_weedout_picker,
|
||||
NULL,
|
||||
};
|
||||
|
||||
pos->prefix_cost.convert_from_cost(*current_read_time);
|
||||
pos->prefix_record_count= *current_record_count;
|
||||
pos->sj_strategy= SJ_OPT_NONE;
|
||||
|
||||
pos->prefix_dups_producing_tables= join->cur_dups_producing_tables;
|
||||
|
||||
/* We're performing optimization inside SJ-Materialization nest */
|
||||
if (join->emb_sjm_nest)
|
||||
{
|
||||
pos->invalidate_firstmatch_prefix();
|
||||
pos->first_loosescan_table= MAX_TABLES;
|
||||
pos->dupsweedout_tables= 0;
|
||||
pos->sjm_scan_need_tables= 0;
|
||||
/*
|
||||
We're performing optimization inside SJ-Materialization nest:
|
||||
- there are no other semi-joins inside semi-join nests
|
||||
- attempts to build semi-join strategies here will confuse
|
||||
the optimizer, so bail out.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
/* Initialize the state or copy it from prev. tables */
|
||||
/*
|
||||
Update join->cur_sj_inner_tables (Used by FirstMatch in this function and
|
||||
LooseScan detector in best_access_path)
|
||||
*/
|
||||
remaining_tables &= ~new_join_tab->table->map;
|
||||
pos->prefix_dups_producing_tables= join->cur_dups_producing_tables;
|
||||
TABLE_LIST *emb_sj_nest;
|
||||
if ((emb_sj_nest= new_join_tab->emb_sj_nest))
|
||||
{
|
||||
/// join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
|
||||
join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables;
|
||||
|
||||
/* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */
|
||||
/// if (!(remaining_tables &
|
||||
/// emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map))
|
||||
/// join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
|
||||
}
|
||||
|
||||
Semi_join_strategy_picker **strategy;
|
||||
if (idx == join->const_tables)
|
||||
{
|
||||
pos->invalidate_firstmatch_prefix();
|
||||
pos->first_loosescan_table= MAX_TABLES;
|
||||
pos->dupsweedout_tables= 0;
|
||||
pos->sjm_scan_need_tables= 0;
|
||||
LINT_INIT(pos->sjm_scan_last_inner);
|
||||
/* First table, initialize pickers */
|
||||
for (strategy= pickers; *strategy != NULL; strategy++)
|
||||
(*strategy)->set_empty();
|
||||
pos->inner_tables_handled_with_other_sjs= 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// FirstMatch
|
||||
pos->first_firstmatch_table=
|
||||
(pos[-1].sj_strategy == SJ_OPT_FIRST_MATCH) ?
|
||||
MAX_TABLES : pos[-1].first_firstmatch_table;
|
||||
pos->first_firstmatch_rtbl= pos[-1].first_firstmatch_rtbl;
|
||||
pos->firstmatch_need_tables= pos[-1].firstmatch_need_tables;
|
||||
|
||||
// LooseScan
|
||||
pos->first_loosescan_table=
|
||||
(pos[-1].sj_strategy == SJ_OPT_LOOSE_SCAN) ?
|
||||
MAX_TABLES : pos[-1].first_loosescan_table;
|
||||
pos->loosescan_need_tables= pos[-1].loosescan_need_tables;
|
||||
|
||||
// SJ-Materialization Scan
|
||||
pos->sjm_scan_need_tables=
|
||||
(pos[-1].sj_strategy == SJ_OPT_MATERIALIZE_SCAN) ?
|
||||
0 : pos[-1].sjm_scan_need_tables;
|
||||
pos->sjm_scan_last_inner= pos[-1].sjm_scan_last_inner;
|
||||
|
||||
// Duplicate Weedout
|
||||
pos->dupsweedout_tables= pos[-1].dupsweedout_tables;
|
||||
pos->first_dupsweedout_table= pos[-1].first_dupsweedout_table;
|
||||
for (strategy= pickers; *strategy != NULL; strategy++)
|
||||
{
|
||||
(*strategy)->set_from_prev(pos - 1);
|
||||
}
|
||||
pos->inner_tables_handled_with_other_sjs=
|
||||
pos[-1].inner_tables_handled_with_other_sjs;
|
||||
}
|
||||
|
||||
table_map handled_by_fm_or_ls= 0;
|
||||
/* FirstMatch Strategy */
|
||||
pos->prefix_cost.convert_from_cost(*current_read_time);
|
||||
pos->prefix_record_count= *current_record_count;
|
||||
|
||||
{
|
||||
pos->sj_strategy= SJ_OPT_NONE;
|
||||
|
||||
for (strategy= pickers; *strategy != NULL; strategy++)
|
||||
{
|
||||
table_map handled_fanout;
|
||||
sj_strategy_enum sj_strategy;
|
||||
double rec_count= *current_record_count;
|
||||
double read_time= *current_read_time;
|
||||
if ((*strategy)->check_qep(join, idx, remaining_tables,
|
||||
new_join_tab,
|
||||
&rec_count,
|
||||
&read_time,
|
||||
&handled_fanout,
|
||||
&sj_strategy,
|
||||
loose_scan_pos))
|
||||
{
|
||||
/*
|
||||
It's possible to use the strategy. Use it, if
|
||||
- it removes semi-join fanout that was not removed before
|
||||
- using it is cheaper than using something else,
|
||||
and {if some other strategy has removed fanout
|
||||
that this strategy is trying to remove, then it
|
||||
did remove the fanout only for one semi-join}
|
||||
This is to avoid a situation when
|
||||
1. strategy X removes fanout for semijoin X,Y
|
||||
2. using strategy Z is cheaper, but it only removes
|
||||
fanout from semijoin X.
|
||||
3. We have no clue what to do about fanount of semi-join Y.
|
||||
*/
|
||||
if ((join->cur_dups_producing_tables & handled_fanout) ||
|
||||
(read_time < *current_read_time &&
|
||||
!(handled_fanout & pos->inner_tables_handled_with_other_sjs)))
|
||||
{
|
||||
/* Mark strategy as used */
|
||||
(*strategy)->mark_used();
|
||||
pos->sj_strategy= sj_strategy;
|
||||
*current_read_time= read_time;
|
||||
*current_record_count= rec_count;
|
||||
join->cur_dups_producing_tables &= ~handled_fanout;
|
||||
//TODO: update bitmap of semi-joins that were handled together with
|
||||
// others.
|
||||
if (is_multiple_semi_joins(join->positions, idx, handled_fanout))
|
||||
pos->inner_tables_handled_with_other_sjs |= handled_fanout;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We decided not to apply the strategy. */
|
||||
(*strategy)->set_empty();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((emb_sj_nest= new_join_tab->emb_sj_nest))
|
||||
{
|
||||
join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
|
||||
|
||||
/* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */
|
||||
if (!(remaining_tables &
|
||||
emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map))
|
||||
join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
|
||||
}
|
||||
|
||||
pos->prefix_cost.convert_from_cost(*current_read_time);
|
||||
pos->prefix_record_count= *current_record_count;
|
||||
}
|
||||
|
||||
|
||||
void Sj_materialization_picker::set_from_prev(struct st_position *prev)
|
||||
{
|
||||
if (prev->sjmat_picker.is_used)
|
||||
set_empty();
|
||||
else
|
||||
{
|
||||
sjm_scan_need_tables= prev->sjmat_picker.sjm_scan_need_tables;
|
||||
sjm_scan_last_inner= prev->sjmat_picker.sjm_scan_last_inner;
|
||||
}
|
||||
is_used= FALSE;
|
||||
}
|
||||
|
||||
|
||||
bool Sj_materialization_picker::check_qep(JOIN *join,
|
||||
uint idx,
|
||||
table_map remaining_tables,
|
||||
const JOIN_TAB *new_join_tab,
|
||||
double *record_count,
|
||||
double *read_time,
|
||||
table_map *handled_fanout,
|
||||
sj_strategy_enum *strategy,
|
||||
POSITION *loose_scan_pos)
|
||||
{
|
||||
bool sjm_scan;
|
||||
SJ_MATERIALIZATION_INFO *mat_info;
|
||||
if ((mat_info= at_sjmat_pos(join, remaining_tables,
|
||||
new_join_tab, idx, &sjm_scan)))
|
||||
{
|
||||
if (sjm_scan)
|
||||
{
|
||||
/*
|
||||
We can't yet evaluate this option yet. This is because we can't
|
||||
accout for fanout of sj-inner tables yet:
|
||||
|
||||
ntX SJM-SCAN(it1 ... itN) | ot1 ... otN |
|
||||
^(1) ^(2)
|
||||
|
||||
we're now at position (1). SJM temptable in general has multiple
|
||||
records, so at point (1) we'll get the fanout from sj-inner tables (ie
|
||||
there will be multiple record combinations).
|
||||
|
||||
The final join result will not contain any semi-join produced
|
||||
fanout, i.e. tables within SJM-SCAN(...) will not contribute to
|
||||
the cardinality of the join output. Extra fanout produced by
|
||||
SJM-SCAN(...) will be 'absorbed' into fanout produced by ot1 ... otN.
|
||||
|
||||
The simple way to model this is to remove SJM-SCAN(...) fanout once
|
||||
we reach the point #2.
|
||||
*/
|
||||
sjm_scan_need_tables=
|
||||
new_join_tab->emb_sj_nest->sj_inner_tables |
|
||||
new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
|
||||
new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
|
||||
sjm_scan_last_inner= idx;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* This is SJ-Materialization with lookups */
|
||||
COST_VECT prefix_cost;
|
||||
signed int first_tab= (int)idx - mat_info->tables;
|
||||
double prefix_rec_count;
|
||||
if (first_tab < (int)join->const_tables)
|
||||
{
|
||||
prefix_cost.zero();
|
||||
prefix_rec_count= 1.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
prefix_cost= join->positions[first_tab].prefix_cost;
|
||||
prefix_rec_count= join->positions[first_tab].prefix_record_count;
|
||||
}
|
||||
|
||||
double mat_read_time= prefix_cost.total_cost();
|
||||
mat_read_time += mat_info->materialization_cost.total_cost() +
|
||||
prefix_rec_count * mat_info->lookup_cost.total_cost();
|
||||
|
||||
/*
|
||||
NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION
|
||||
elements to join->positions as that makes it hard to return things
|
||||
back when making one step back in join optimization. That's done
|
||||
after the QEP has been chosen.
|
||||
*/
|
||||
*read_time= mat_read_time;
|
||||
*record_count= prefix_rec_count;
|
||||
*handled_fanout= new_join_tab->emb_sj_nest->sj_inner_tables;
|
||||
*strategy= SJ_OPT_MATERIALIZE;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
/* 4.A SJM-Scan second phase check */
|
||||
if (sjm_scan_need_tables && /* Have SJM-Scan prefix */
|
||||
!(sjm_scan_need_tables & remaining_tables))
|
||||
{
|
||||
TABLE_LIST *mat_nest=
|
||||
join->positions[sjm_scan_last_inner].table->emb_sj_nest;
|
||||
SJ_MATERIALIZATION_INFO *mat_info= mat_nest->sj_mat_info;
|
||||
|
||||
double prefix_cost;
|
||||
double prefix_rec_count;
|
||||
int first_tab= sjm_scan_last_inner + 1 - mat_info->tables;
|
||||
/* Get the prefix cost */
|
||||
if (first_tab == (int)join->const_tables)
|
||||
{
|
||||
prefix_rec_count= 1.0;
|
||||
prefix_cost= 0.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
prefix_cost= join->positions[first_tab - 1].prefix_cost.total_cost();
|
||||
prefix_rec_count= join->positions[first_tab - 1].prefix_record_count;
|
||||
}
|
||||
|
||||
/* Add materialization cost */
|
||||
prefix_cost += mat_info->materialization_cost.total_cost() +
|
||||
prefix_rec_count * mat_info->scan_cost.total_cost();
|
||||
prefix_rec_count *= mat_info->rows;
|
||||
|
||||
uint i;
|
||||
table_map rem_tables= remaining_tables;
|
||||
for (i= idx; i != (first_tab + mat_info->tables - 1); i--)
|
||||
rem_tables |= join->positions[i].table->table->map;
|
||||
|
||||
POSITION curpos, dummy;
|
||||
/* Need to re-run best-access-path as we prefix_rec_count has changed */
|
||||
bool disable_jbuf= (join->thd->variables.join_cache_level == 0);
|
||||
for (i= first_tab + mat_info->tables; i <= idx; i++)
|
||||
{
|
||||
best_access_path(join, join->positions[i].table, rem_tables, i,
|
||||
disable_jbuf, prefix_rec_count, &curpos, &dummy);
|
||||
prefix_rec_count *= curpos.records_read;
|
||||
prefix_cost += curpos.read_time;
|
||||
}
|
||||
|
||||
*strategy= SJ_OPT_MATERIALIZE_SCAN;
|
||||
*read_time= prefix_cost;
|
||||
*record_count= prefix_rec_count;
|
||||
*handled_fanout= mat_nest->sj_inner_tables;
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
void LooseScan_picker::set_from_prev(struct st_position *prev)
|
||||
{
|
||||
if (prev->loosescan_picker.is_used)
|
||||
set_empty();
|
||||
else
|
||||
{
|
||||
first_loosescan_table= prev->loosescan_picker.first_loosescan_table;
|
||||
loosescan_need_tables= prev->loosescan_picker.loosescan_need_tables;
|
||||
}
|
||||
is_used= FALSE;
|
||||
}
|
||||
|
||||
|
||||
bool LooseScan_picker::check_qep(JOIN *join,
|
||||
uint idx,
|
||||
table_map remaining_tables,
|
||||
const JOIN_TAB *new_join_tab,
|
||||
double *record_count,
|
||||
double *read_time,
|
||||
table_map *handled_fanout,
|
||||
sj_strategy_enum *strategy,
|
||||
struct st_position *loose_scan_pos)
|
||||
{
|
||||
POSITION *first= join->positions + first_loosescan_table;
|
||||
/*
|
||||
LooseScan strategy can't handle interleaving between tables from the
|
||||
semi-join that LooseScan is handling and any other tables.
|
||||
|
||||
If we were considering LooseScan for the join prefix (1)
|
||||
and the table we're adding creates an interleaving (2)
|
||||
then
|
||||
stop considering loose scan
|
||||
*/
|
||||
if ((first_loosescan_table != MAX_TABLES) && // (1)
|
||||
(first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2)
|
||||
new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2)
|
||||
{
|
||||
first_loosescan_table= MAX_TABLES;
|
||||
}
|
||||
|
||||
/*
|
||||
If we got an option to use LooseScan for the current table, start
|
||||
considering using LooseScan strategy
|
||||
*/
|
||||
if (loose_scan_pos->read_time != DBL_MAX && !join->outer_join)
|
||||
{
|
||||
first_loosescan_table= idx;
|
||||
loosescan_need_tables=
|
||||
new_join_tab->emb_sj_nest->sj_inner_tables |
|
||||
new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
|
||||
new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
|
||||
}
|
||||
|
||||
if ((first_loosescan_table != MAX_TABLES) &&
|
||||
!(remaining_tables & loosescan_need_tables) &&
|
||||
(new_join_tab->table->map & loosescan_need_tables))
|
||||
{
|
||||
/*
|
||||
Ok we have LooseScan plan and also have all LooseScan sj-nest's
|
||||
inner tables and outer correlated tables into the prefix.
|
||||
*/
|
||||
|
||||
first= join->positions + first_loosescan_table;
|
||||
uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables);
|
||||
/* Got a complete LooseScan range. Calculate its cost */
|
||||
/*
|
||||
The same problem as with FirstMatch - we need to save POSITIONs
|
||||
somewhere but reserving space for all cases would require too
|
||||
much space. We will re-calculate POSITION structures later on.
|
||||
*/
|
||||
bool disable_jbuf= (join->thd->variables.join_cache_level == 0);
|
||||
optimize_wo_join_buffering(join, first_loosescan_table, idx,
|
||||
remaining_tables,
|
||||
TRUE, //first_alt
|
||||
disable_jbuf ? join->table_count :
|
||||
first_loosescan_table + n_tables,
|
||||
record_count,
|
||||
read_time);
|
||||
/*
|
||||
We don't yet have any other strategies that could handle this
|
||||
semi-join nest (the other options are Duplicate Elimination or
|
||||
Materialization, which need at least the same set of tables in
|
||||
the join prefix to be considered) so unconditionally pick the
|
||||
LooseScan.
|
||||
*/
|
||||
*strategy= SJ_OPT_LOOSE_SCAN;
|
||||
*handled_fanout= first->table->emb_sj_nest->sj_inner_tables;
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
void Firstmatch_picker::set_from_prev(struct st_position *prev)
|
||||
{
|
||||
if (prev->firstmatch_picker.is_used)
|
||||
invalidate_firstmatch_prefix();
|
||||
else
|
||||
{
|
||||
first_firstmatch_table= prev->firstmatch_picker.first_firstmatch_table;
|
||||
first_firstmatch_rtbl= prev->firstmatch_picker.first_firstmatch_rtbl;
|
||||
firstmatch_need_tables= prev->firstmatch_picker.firstmatch_need_tables;
|
||||
}
|
||||
is_used= FALSE;
|
||||
}
|
||||
|
||||
bool Firstmatch_picker::check_qep(JOIN *join,
|
||||
uint idx,
|
||||
table_map remaining_tables,
|
||||
const JOIN_TAB *new_join_tab,
|
||||
double *record_count,
|
||||
double *read_time,
|
||||
table_map *handled_fanout,
|
||||
sj_strategy_enum *strategy,
|
||||
POSITION *loose_scan_pos)
|
||||
{
|
||||
if (new_join_tab->emb_sj_nest &&
|
||||
optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH) &&
|
||||
!join->outer_join)
|
||||
@ -2259,298 +2603,102 @@ void advance_sj_state(JOIN *join, table_map remaining_tables,
|
||||
((remaining_tables | new_join_tab->table->map) & sj_inner_tables)))
|
||||
{
|
||||
/* Start tracking potential FirstMatch range */
|
||||
pos->first_firstmatch_table= idx;
|
||||
pos->firstmatch_need_tables= sj_inner_tables;
|
||||
pos->first_firstmatch_rtbl= remaining_tables;
|
||||
first_firstmatch_table= idx;
|
||||
firstmatch_need_tables= sj_inner_tables;
|
||||
first_firstmatch_rtbl= remaining_tables;
|
||||
}
|
||||
|
||||
if (pos->in_firstmatch_prefix())
|
||||
if (in_firstmatch_prefix())
|
||||
{
|
||||
if (outer_corr_tables & pos->first_firstmatch_rtbl)
|
||||
if (outer_corr_tables & first_firstmatch_rtbl)
|
||||
{
|
||||
/*
|
||||
Trying to add an sj-inner table whose sj-nest has an outer correlated
|
||||
table that was not in the prefix. This means FirstMatch can't be used.
|
||||
*/
|
||||
pos->invalidate_firstmatch_prefix();
|
||||
invalidate_firstmatch_prefix();
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Record that we need all of this semi-join's inner tables, too */
|
||||
pos->firstmatch_need_tables|= sj_inner_tables;
|
||||
firstmatch_need_tables|= sj_inner_tables;
|
||||
}
|
||||
|
||||
if (pos->in_firstmatch_prefix() &&
|
||||
!(pos->firstmatch_need_tables & remaining_tables))
|
||||
if (in_firstmatch_prefix() &&
|
||||
!(firstmatch_need_tables & remaining_tables))
|
||||
{
|
||||
/*
|
||||
Got a complete FirstMatch range.
|
||||
Calculate correct costs and fanout
|
||||
*/
|
||||
optimize_wo_join_buffering(join, pos->first_firstmatch_table, idx,
|
||||
optimize_wo_join_buffering(join, first_firstmatch_table, idx,
|
||||
remaining_tables, FALSE, idx,
|
||||
current_record_count,
|
||||
current_read_time);
|
||||
record_count,
|
||||
read_time);
|
||||
/*
|
||||
We don't yet know what are the other strategies, so pick the
|
||||
FirstMatch.
|
||||
|
||||
We ought to save the alternate POSITIONs produced by
|
||||
optimize_wo_join_buffering but the problem is that providing save
|
||||
space uses too much space. Instead, we will re-calculate the
|
||||
alternate POSITIONs after we've picked the best QEP.
|
||||
*/
|
||||
pos->sj_strategy= SJ_OPT_FIRST_MATCH;
|
||||
handled_by_fm_or_ls= pos->firstmatch_need_tables;
|
||||
*handled_fanout= firstmatch_need_tables;
|
||||
/* *record_count and *read_time were set by the above call */
|
||||
*strategy= SJ_OPT_FIRST_MATCH;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* LooseScan Strategy */
|
||||
{
|
||||
POSITION *first=join->positions+pos->first_loosescan_table;
|
||||
/*
|
||||
LooseScan strategy can't handle interleaving between tables from the
|
||||
semi-join that LooseScan is handling and any other tables.
|
||||
|
||||
If we were considering LooseScan for the join prefix (1)
|
||||
and the table we're adding creates an interleaving (2)
|
||||
then
|
||||
stop considering loose scan
|
||||
*/
|
||||
if ((pos->first_loosescan_table != MAX_TABLES) && // (1)
|
||||
(first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2)
|
||||
new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2)
|
||||
{
|
||||
pos->first_loosescan_table= MAX_TABLES;
|
||||
}
|
||||
|
||||
/*
|
||||
If we got an option to use LooseScan for the current table, start
|
||||
considering using LooseScan strategy
|
||||
*/
|
||||
if (loose_scan_pos->read_time != DBL_MAX && !join->outer_join)
|
||||
{
|
||||
pos->first_loosescan_table= idx;
|
||||
pos->loosescan_need_tables=
|
||||
new_join_tab->emb_sj_nest->sj_inner_tables |
|
||||
new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
|
||||
new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
|
||||
}
|
||||
|
||||
if ((pos->first_loosescan_table != MAX_TABLES) &&
|
||||
!(remaining_tables & pos->loosescan_need_tables) &&
|
||||
(pos->table->table->map & pos->loosescan_need_tables))
|
||||
{
|
||||
/*
|
||||
Ok we have LooseScan plan and also have all LooseScan sj-nest's
|
||||
inner tables and outer correlated tables into the prefix.
|
||||
*/
|
||||
|
||||
first=join->positions + pos->first_loosescan_table;
|
||||
uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables);
|
||||
/* Got a complete LooseScan range. Calculate its cost */
|
||||
/*
|
||||
The same problem as with FirstMatch - we need to save POSITIONs
|
||||
somewhere but reserving space for all cases would require too
|
||||
much space. We will re-calculate POSITION structures later on.
|
||||
*/
|
||||
optimize_wo_join_buffering(join, pos->first_loosescan_table, idx,
|
||||
remaining_tables,
|
||||
TRUE, //first_alt
|
||||
disable_jbuf ? join->table_count :
|
||||
pos->first_loosescan_table + n_tables,
|
||||
current_record_count,
|
||||
current_read_time);
|
||||
/*
|
||||
We don't yet have any other strategies that could handle this
|
||||
semi-join nest (the other options are Duplicate Elimination or
|
||||
Materialization, which need at least the same set of tables in
|
||||
the join prefix to be considered) so unconditionally pick the
|
||||
LooseScan.
|
||||
*/
|
||||
pos->sj_strategy= SJ_OPT_LOOSE_SCAN;
|
||||
handled_by_fm_or_ls= first->table->emb_sj_nest->sj_inner_tables;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Update join->cur_sj_inner_tables (Used by FirstMatch in this function and
|
||||
LooseScan detector in best_access_path)
|
||||
*/
|
||||
if ((emb_sj_nest= new_join_tab->emb_sj_nest))
|
||||
{
|
||||
join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
|
||||
join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables;
|
||||
|
||||
/* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */
|
||||
if (!(remaining_tables &
|
||||
emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map))
|
||||
join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
|
||||
}
|
||||
join->cur_dups_producing_tables &= ~handled_by_fm_or_ls;
|
||||
|
||||
/* 4. SJ-Materialization and SJ-Materialization-scan strategy handler */
|
||||
bool sjm_scan;
|
||||
SJ_MATERIALIZATION_INFO *mat_info;
|
||||
if ((mat_info= at_sjmat_pos(join, remaining_tables,
|
||||
new_join_tab, idx, &sjm_scan)))
|
||||
{
|
||||
if (sjm_scan)
|
||||
{
|
||||
/*
|
||||
We can't yet evaluate this option yet. This is because we can't
|
||||
accout for fanout of sj-inner tables yet:
|
||||
|
||||
ntX SJM-SCAN(it1 ... itN) | ot1 ... otN |
|
||||
^(1) ^(2)
|
||||
|
||||
we're now at position (1). SJM temptable in general has multiple
|
||||
records, so at point (1) we'll get the fanout from sj-inner tables (ie
|
||||
there will be multiple record combinations).
|
||||
|
||||
The final join result will not contain any semi-join produced
|
||||
fanout, i.e. tables within SJM-SCAN(...) will not contribute to
|
||||
the cardinality of the join output. Extra fanout produced by
|
||||
SJM-SCAN(...) will be 'absorbed' into fanout produced by ot1 ... otN.
|
||||
|
||||
The simple way to model this is to remove SJM-SCAN(...) fanout once
|
||||
we reach the point #2.
|
||||
*/
|
||||
pos->sjm_scan_need_tables=
|
||||
new_join_tab->emb_sj_nest->sj_inner_tables |
|
||||
new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
|
||||
new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
|
||||
pos->sjm_scan_last_inner= idx;
|
||||
}
|
||||
void Duplicate_weedout_picker::set_from_prev(POSITION *prev)
|
||||
{
|
||||
if (prev->dups_weedout_picker.is_used)
|
||||
set_empty();
|
||||
else
|
||||
{
|
||||
/* This is SJ-Materialization with lookups */
|
||||
COST_VECT prefix_cost;
|
||||
signed int first_tab= (int)idx - mat_info->tables;
|
||||
double prefix_rec_count;
|
||||
if (first_tab < (int)join->const_tables)
|
||||
{
|
||||
prefix_cost.zero();
|
||||
prefix_rec_count= 1.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
prefix_cost= join->positions[first_tab].prefix_cost;
|
||||
prefix_rec_count= join->positions[first_tab].prefix_record_count;
|
||||
dupsweedout_tables= prev->dups_weedout_picker.dupsweedout_tables;
|
||||
first_dupsweedout_table= prev->dups_weedout_picker.first_dupsweedout_table;
|
||||
}
|
||||
is_used= FALSE;
|
||||
}
|
||||
|
||||
double mat_read_time= prefix_cost.total_cost();
|
||||
mat_read_time += mat_info->materialization_cost.total_cost() +
|
||||
prefix_rec_count * mat_info->lookup_cost.total_cost();
|
||||
|
||||
if (mat_read_time < *current_read_time || join->cur_dups_producing_tables)
|
||||
{
|
||||
/*
|
||||
NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION
|
||||
elements to join->positions as that makes it hard to return things
|
||||
back when making one step back in join optimization. That's done
|
||||
after the QEP has been chosen.
|
||||
*/
|
||||
pos->sj_strategy= SJ_OPT_MATERIALIZE;
|
||||
*current_read_time= mat_read_time;
|
||||
*current_record_count= prefix_rec_count;
|
||||
join->cur_dups_producing_tables&=
|
||||
~new_join_tab->emb_sj_nest->sj_inner_tables;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* 4.A SJM-Scan second phase check */
|
||||
if (pos->sjm_scan_need_tables && /* Have SJM-Scan prefix */
|
||||
!(pos->sjm_scan_need_tables & remaining_tables))
|
||||
{
|
||||
TABLE_LIST *mat_nest=
|
||||
join->positions[pos->sjm_scan_last_inner].table->emb_sj_nest;
|
||||
SJ_MATERIALIZATION_INFO *mat_info= mat_nest->sj_mat_info;
|
||||
|
||||
double prefix_cost;
|
||||
double prefix_rec_count;
|
||||
int first_tab= pos->sjm_scan_last_inner + 1 - mat_info->tables;
|
||||
/* Get the prefix cost */
|
||||
if (first_tab == (int)join->const_tables)
|
||||
{
|
||||
prefix_rec_count= 1.0;
|
||||
prefix_cost= 0.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
prefix_cost= join->positions[first_tab - 1].prefix_cost.total_cost();
|
||||
prefix_rec_count= join->positions[first_tab - 1].prefix_record_count;
|
||||
}
|
||||
|
||||
/* Add materialization cost */
|
||||
prefix_cost += mat_info->materialization_cost.total_cost() +
|
||||
prefix_rec_count * mat_info->scan_cost.total_cost();
|
||||
prefix_rec_count *= mat_info->rows;
|
||||
|
||||
uint i;
|
||||
table_map rem_tables= remaining_tables;
|
||||
for (i= idx; i != (first_tab + mat_info->tables - 1); i--)
|
||||
rem_tables |= join->positions[i].table->table->map;
|
||||
|
||||
POSITION curpos, dummy;
|
||||
/* Need to re-run best-access-path as we prefix_rec_count has changed */
|
||||
for (i= first_tab + mat_info->tables; i <= idx; i++)
|
||||
{
|
||||
best_access_path(join, join->positions[i].table, rem_tables, i,
|
||||
disable_jbuf, prefix_rec_count, &curpos, &dummy);
|
||||
prefix_rec_count *= curpos.records_read;
|
||||
prefix_cost += curpos.read_time;
|
||||
}
|
||||
|
||||
/*
|
||||
Use the strategy if
|
||||
* it is cheaper then what we've had, or
|
||||
* we haven't picked any other semi-join strategy yet
|
||||
In the second case, we pick this strategy unconditionally because
|
||||
comparing cost without semi-join duplicate removal with cost with
|
||||
duplicate removal is not an apples-to-apples comparison.
|
||||
*/
|
||||
if (prefix_cost < *current_read_time || join->cur_dups_producing_tables)
|
||||
{
|
||||
pos->sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
|
||||
*current_read_time= prefix_cost;
|
||||
*current_record_count= prefix_rec_count;
|
||||
join->cur_dups_producing_tables&= ~mat_nest->sj_inner_tables;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/* 5. Duplicate Weedout strategy handler */
|
||||
{
|
||||
/*
|
||||
Duplicate weedout can be applied after all ON-correlated and
|
||||
correlated
|
||||
*/
|
||||
bool Duplicate_weedout_picker::check_qep(JOIN *join,
|
||||
uint idx,
|
||||
table_map remaining_tables,
|
||||
const JOIN_TAB *new_join_tab,
|
||||
double *record_count,
|
||||
double *read_time,
|
||||
table_map *handled_fanout,
|
||||
sj_strategy_enum *strategy,
|
||||
POSITION *loose_scan_pos
|
||||
)
|
||||
{
|
||||
TABLE_LIST *nest;
|
||||
if ((nest= new_join_tab->emb_sj_nest))
|
||||
{
|
||||
if (!pos->dupsweedout_tables)
|
||||
pos->first_dupsweedout_table= idx;
|
||||
if (!dupsweedout_tables)
|
||||
first_dupsweedout_table= idx;
|
||||
|
||||
pos->dupsweedout_tables |= nest->sj_inner_tables |
|
||||
dupsweedout_tables |= nest->sj_inner_tables |
|
||||
nest->nested_join->sj_depends_on |
|
||||
nest->nested_join->sj_corr_tables;
|
||||
}
|
||||
|
||||
if (pos->dupsweedout_tables)
|
||||
if (dupsweedout_tables)
|
||||
{
|
||||
/* we're in the process of constructing a DuplicateWeedout range */
|
||||
TABLE_LIST *emb= new_join_tab->table->pos_in_table_list->embedding;
|
||||
/* and we've entered an inner side of an outer join*/
|
||||
if (emb && emb->on_expr)
|
||||
pos->dupsweedout_tables |= emb->nested_join->used_tables;
|
||||
dupsweedout_tables |= emb->nested_join->used_tables;
|
||||
}
|
||||
|
||||
if (pos->dupsweedout_tables &&
|
||||
!(remaining_tables &
|
||||
~new_join_tab->table->map & pos->dupsweedout_tables))
|
||||
/* If this is the last table that we need for DuplicateWeedout range */
|
||||
if (dupsweedout_tables && !(remaining_tables & ~new_join_tab->table->map &
|
||||
dupsweedout_tables))
|
||||
{
|
||||
/*
|
||||
Ok, reached a state where we could put a dups weedout point.
|
||||
@ -2566,7 +2714,7 @@ void advance_sj_state(JOIN *join, table_map remaining_tables,
|
||||
We need to calculate the cost in case #2 also because we need to make
|
||||
choice between this join order and others.
|
||||
*/
|
||||
uint first_tab= pos->first_dupsweedout_table;
|
||||
uint first_tab= first_dupsweedout_table;
|
||||
double dups_cost;
|
||||
double prefix_rec_count;
|
||||
double sj_inner_fanout= 1.0;
|
||||
@ -2586,7 +2734,7 @@ void advance_sj_state(JOIN *join, table_map remaining_tables,
|
||||
}
|
||||
|
||||
table_map dups_removed_fanout= 0;
|
||||
for (uint j= pos->first_dupsweedout_table; j <= idx; j++)
|
||||
for (uint j= first_dupsweedout_table; j <= idx; j++)
|
||||
{
|
||||
POSITION *p= join->positions + j;
|
||||
dups_cost += p->read_time;
|
||||
@ -2623,23 +2771,13 @@ void advance_sj_state(JOIN *join, table_map remaining_tables,
|
||||
one_lookup_cost;
|
||||
dups_cost += write_cost + full_lookup_cost;
|
||||
|
||||
/*
|
||||
Use the strategy if
|
||||
* it is cheaper then what we've had, or
|
||||
* we haven't picked any other semi-join strategy yet
|
||||
The second part is necessary because this strategy is the last one
|
||||
to consider (it needs "the most" tables in the prefix) and we can't
|
||||
leave duplicate-producing tables not handled by any strategy.
|
||||
*/
|
||||
if (dups_cost < *current_read_time || join->cur_dups_producing_tables)
|
||||
{
|
||||
pos->sj_strategy= SJ_OPT_DUPS_WEEDOUT;
|
||||
*current_read_time= dups_cost;
|
||||
*current_record_count= prefix_rec_count * sj_outer_fanout;
|
||||
join->cur_dups_producing_tables &= ~dups_removed_fanout;
|
||||
}
|
||||
}
|
||||
*read_time= dups_cost;
|
||||
*record_count= prefix_rec_count * sj_outer_fanout;
|
||||
*handled_fanout= dups_removed_fanout;
|
||||
*strategy= SJ_OPT_DUPS_WEEDOUT;
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
@ -2836,11 +2974,11 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
|
||||
}
|
||||
else if (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
|
||||
{
|
||||
POSITION *first_inner= join->best_positions + pos->sjm_scan_last_inner;
|
||||
POSITION *first_inner= join->best_positions + pos->sjmat_picker.sjm_scan_last_inner;
|
||||
SJ_MATERIALIZATION_INFO *sjm= first_inner->table->emb_sj_nest->sj_mat_info;
|
||||
sjm->is_used= TRUE;
|
||||
sjm->is_sj_scan= TRUE;
|
||||
first= pos->sjm_scan_last_inner - sjm->tables + 1;
|
||||
first= pos->sjmat_picker.sjm_scan_last_inner - sjm->tables + 1;
|
||||
memcpy(join->best_positions + first,
|
||||
sjm->positions, sizeof(POSITION) * sjm->tables);
|
||||
join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
|
||||
@ -2878,7 +3016,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
|
||||
|
||||
if (pos->sj_strategy == SJ_OPT_FIRST_MATCH)
|
||||
{
|
||||
first= pos->first_firstmatch_table;
|
||||
first= pos->firstmatch_picker.first_firstmatch_table;
|
||||
join->best_positions[first].sj_strategy= SJ_OPT_FIRST_MATCH;
|
||||
join->best_positions[first].n_sj_tables= tablenr - first + 1;
|
||||
POSITION dummy; // For loose scan paths
|
||||
@ -2911,7 +3049,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
|
||||
|
||||
if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN)
|
||||
{
|
||||
first= pos->first_loosescan_table;
|
||||
first= pos->loosescan_picker.first_loosescan_table;
|
||||
POSITION *first_pos= join->best_positions + first;
|
||||
POSITION loose_scan_pos; // For loose scan paths
|
||||
double record_count= (first== join->const_tables)? 1.0:
|
||||
@ -2950,7 +3088,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
|
||||
Duplicate Weedout starting at pos->first_dupsweedout_table, ending at
|
||||
this table.
|
||||
*/
|
||||
first= pos->first_dupsweedout_table;
|
||||
first= pos->dups_weedout_picker.first_dupsweedout_table;
|
||||
join->best_positions[first].sj_strategy= SJ_OPT_DUPS_WEEDOUT;
|
||||
join->best_positions[first].n_sj_tables= tablenr - first + 1;
|
||||
}
|
||||
@ -3893,8 +4031,8 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
|
||||
|
||||
/* Calculate key length */
|
||||
keylen= 0;
|
||||
keyno= pos->loosescan_key;
|
||||
for (uint kp=0; kp < pos->loosescan_parts; kp++)
|
||||
keyno= pos->loosescan_picker.loosescan_key;
|
||||
for (uint kp=0; kp < pos->loosescan_picker.loosescan_parts; kp++)
|
||||
keylen += tab->table->key_info[keyno].key_part[kp].store_length;
|
||||
|
||||
tab->loosescan_key_len= keylen;
|
||||
|
@ -263,8 +263,8 @@ public:
|
||||
{
|
||||
pos->records_read= best_loose_scan_records;
|
||||
pos->key= best_loose_scan_start_key;
|
||||
pos->loosescan_key= best_loose_scan_key;
|
||||
pos->loosescan_parts= best_max_loose_keypart + 1;
|
||||
pos->loosescan_picker.loosescan_key= best_loose_scan_key;
|
||||
pos->loosescan_picker.loosescan_parts= best_max_loose_keypart + 1;
|
||||
pos->use_join_buffer= FALSE;
|
||||
pos->table= tab;
|
||||
// todo need ref_depend_map ?
|
||||
@ -277,8 +277,7 @@ public:
|
||||
};
|
||||
|
||||
|
||||
void advance_sj_state(JOIN *join, const table_map remaining_tables,
|
||||
const JOIN_TAB *new_join_tab, uint idx,
|
||||
void advance_sj_state(JOIN *join, const table_map remaining_tables, uint idx,
|
||||
double *current_record_count, double *current_read_time,
|
||||
POSITION *loose_scan_pos);
|
||||
void restore_prev_sj_state(const table_map remaining_tables,
|
||||
|
@ -85,7 +85,7 @@ static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const
|
||||
static bool find_best(JOIN *join,table_map rest_tables,uint index,
|
||||
double record_count,double read_time);
|
||||
static uint cache_record_length(JOIN *join,uint index);
|
||||
static bool get_best_combination(JOIN *join);
|
||||
bool get_best_combination(JOIN *join);
|
||||
static store_key *get_store_key(THD *thd,
|
||||
KEYUSE *keyuse, table_map used_tables,
|
||||
KEY_PART_INFO *key_part, uchar *key_buff,
|
||||
@ -4883,7 +4883,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
|
||||
join->positions[idx].records_read=1.0; /* This is a const table */
|
||||
join->positions[idx].ref_depend_map= 0;
|
||||
|
||||
join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
|
||||
// join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
|
||||
join->positions[idx].sj_strategy= SJ_OPT_NONE;
|
||||
join->positions[idx].use_join_buffer= FALSE;
|
||||
|
||||
@ -5533,7 +5533,7 @@ best_access_path(JOIN *join,
|
||||
pos->key= best_key;
|
||||
pos->table= s;
|
||||
pos->ref_depend_map= best_ref_depends_map;
|
||||
pos->loosescan_key= MAX_KEY;
|
||||
pos->loosescan_picker.loosescan_key= MAX_KEY;
|
||||
pos->use_join_buffer= best_uses_jbuf;
|
||||
|
||||
loose_scan_opt.save_to_position(s, loose_scan_pos);
|
||||
@ -5840,7 +5840,7 @@ optimize_straight_join(JOIN *join, table_map join_tables)
|
||||
/* compute the cost of the new plan extended with 's' */
|
||||
record_count*= join->positions[idx].records_read;
|
||||
read_time+= join->positions[idx].read_time;
|
||||
advance_sj_state(join, join_tables, s, idx, &record_count, &read_time,
|
||||
advance_sj_state(join, join_tables, idx, &record_count, &read_time,
|
||||
&loose_scan_pos);
|
||||
|
||||
join_tables&= ~(s->table->map);
|
||||
@ -6356,7 +6356,7 @@ best_extension_by_limited_search(JOIN *join,
|
||||
current_record_count= record_count * position->records_read;
|
||||
current_read_time= read_time + position->read_time;
|
||||
|
||||
advance_sj_state(join, remaining_tables, s, idx, ¤t_record_count,
|
||||
advance_sj_state(join, remaining_tables, idx, ¤t_record_count,
|
||||
¤t_read_time, &loose_scan_pos);
|
||||
|
||||
/* Expand only partial plans with lower cost than the best QEP so far */
|
||||
@ -6513,7 +6513,7 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
|
||||
*/
|
||||
double current_record_count=record_count*records;
|
||||
double current_read_time=read_time+best;
|
||||
advance_sj_state(join, rest_tables, s, idx, ¤t_record_count,
|
||||
advance_sj_state(join, rest_tables, idx, ¤t_record_count,
|
||||
¤t_read_time, &loose_scan_pos);
|
||||
|
||||
if (best_record_count > current_record_count ||
|
||||
@ -7013,7 +7013,7 @@ static Item * const null_ptr= NULL;
|
||||
TRUE Out of memory
|
||||
*/
|
||||
|
||||
static bool
|
||||
bool
|
||||
get_best_combination(JOIN *join)
|
||||
{
|
||||
uint tablenr;
|
||||
@ -7091,13 +7091,6 @@ get_best_combination(JOIN *join)
|
||||
|
||||
*j= *join->best_positions[tablenr].table;
|
||||
|
||||
#if 0
|
||||
/* SJ-Materialization is represented with join tab ranges */
|
||||
if (j->sj_strategy == SJ_OPT_MATERIALIZE ||
|
||||
j->sj_strategy == SJ_OPT_MATERIALIZE)
|
||||
j->sj_strategy= SJ_OPT_NONE;
|
||||
#endif
|
||||
|
||||
j->bush_root_tab= sjm_nest_root;
|
||||
|
||||
form=join->table[tablenr]=j->table;
|
||||
@ -7120,7 +7113,7 @@ get_best_combination(JOIN *join)
|
||||
(join->best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN))
|
||||
{
|
||||
j->type=JT_ALL;
|
||||
j->index= join->best_positions[tablenr].loosescan_key;
|
||||
j->index= join->best_positions[tablenr].loosescan_picker.loosescan_key;
|
||||
if (tablenr != join->const_tables)
|
||||
join->full_join=1;
|
||||
}
|
||||
|
339
sql/sql_select.h
339
sql/sql_select.h
@ -158,6 +158,17 @@ enum enum_nested_loop_state
|
||||
};
|
||||
|
||||
|
||||
/* Possible sj_strategy values */
|
||||
enum sj_strategy_enum
|
||||
{
|
||||
SJ_OPT_NONE=0,
|
||||
SJ_OPT_DUPS_WEEDOUT=1,
|
||||
SJ_OPT_LOOSE_SCAN =2,
|
||||
SJ_OPT_FIRST_MATCH =3,
|
||||
SJ_OPT_MATERIALIZE =4,
|
||||
SJ_OPT_MATERIALIZE_SCAN=5
|
||||
};
|
||||
|
||||
/* Values for JOIN_TAB::packed_info */
|
||||
#define TAB_INFO_HAVE_VALUE 1
|
||||
#define TAB_INFO_USING_INDEX 2
|
||||
@ -374,7 +385,7 @@ typedef struct st_join_table {
|
||||
POSITION::sj_strategy field. This field is set up by the
|
||||
fix_semijoin_strategies_for_picked_join_order.
|
||||
*/
|
||||
uint sj_strategy;
|
||||
enum sj_strategy_enum sj_strategy;
|
||||
|
||||
uint n_sj_tables;
|
||||
|
||||
@ -496,66 +507,126 @@ enum_nested_loop_state
|
||||
end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
|
||||
bool end_of_records);
|
||||
|
||||
/* psergey */
|
||||
|
||||
/**
|
||||
Information about a position of table within a join order. Used in join
|
||||
optimization.
|
||||
|
||||
struct st_position;
|
||||
|
||||
class Semi_join_strategy_picker
|
||||
{
|
||||
public:
|
||||
/* Called when starting to build a new join prefix */
|
||||
virtual void set_empty() = 0;
|
||||
|
||||
/*
|
||||
Update internal state after another table has been added to the join
|
||||
prefix
|
||||
*/
|
||||
virtual void set_from_prev(struct st_position *prev) = 0;
|
||||
|
||||
virtual bool check_qep(JOIN *join,
|
||||
uint idx,
|
||||
table_map remaining_tables,
|
||||
const JOIN_TAB *new_join_tab,
|
||||
double *record_count,
|
||||
double *read_time,
|
||||
table_map *handled_fanout,
|
||||
sj_strategy_enum *strategy,
|
||||
struct st_position *loose_scan_pos) = 0;
|
||||
|
||||
virtual void mark_used() = 0;
|
||||
|
||||
virtual ~Semi_join_strategy_picker() {}
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
Duplicate Weedout strategy optimization state
|
||||
*/
|
||||
typedef struct st_position
|
||||
|
||||
class Duplicate_weedout_picker : public Semi_join_strategy_picker
|
||||
{
|
||||
/* The first table that the strategy will need to handle */
|
||||
uint first_dupsweedout_table;
|
||||
|
||||
/*
|
||||
Tables that we will need to have in the prefix to do the weedout step
|
||||
(all inner and all outer that the involved semi-joins are correlated with)
|
||||
*/
|
||||
table_map dupsweedout_tables;
|
||||
|
||||
bool is_used;
|
||||
public:
|
||||
void set_empty()
|
||||
{
|
||||
dupsweedout_tables= 0;
|
||||
first_dupsweedout_table= MAX_TABLES;
|
||||
is_used= FALSE;
|
||||
}
|
||||
void set_from_prev(struct st_position *prev);
|
||||
|
||||
bool check_qep(JOIN *join,
|
||||
uint idx,
|
||||
table_map remaining_tables,
|
||||
const JOIN_TAB *new_join_tab,
|
||||
double *record_count,
|
||||
double *read_time,
|
||||
table_map *handled_fanout,
|
||||
sj_strategy_enum *stratey,
|
||||
struct st_position *loose_scan_pos);
|
||||
|
||||
void mark_used() { is_used= TRUE; }
|
||||
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
|
||||
};
|
||||
|
||||
|
||||
class Firstmatch_picker : public Semi_join_strategy_picker
|
||||
{
|
||||
/*
|
||||
The "fanout": number of output rows that will be produced (after
|
||||
pushed down selection condition is applied) per each row combination of
|
||||
previous tables.
|
||||
Index of the first inner table that we intend to handle with this
|
||||
strategy
|
||||
*/
|
||||
double records_read;
|
||||
|
||||
uint first_firstmatch_table;
|
||||
/*
|
||||
Cost accessing the table in course of the entire complete join execution,
|
||||
i.e. cost of one access method use (e.g. 'range' or 'ref' scan ) times
|
||||
number the access method will be invoked.
|
||||
Tables that were not in the join prefix when we've started considering
|
||||
FirstMatch strategy.
|
||||
*/
|
||||
double read_time;
|
||||
JOIN_TAB *table;
|
||||
|
||||
table_map first_firstmatch_rtbl;
|
||||
/*
|
||||
NULL - 'index' or 'range' or 'index_merge' or 'ALL' access is used.
|
||||
Other - [eq_]ref[_or_null] access is used. Pointer to {t.keypart1 = expr}
|
||||
Tables that need to be in the prefix before we can calculate the cost
|
||||
of using FirstMatch strategy.
|
||||
*/
|
||||
KEYUSE *key;
|
||||
table_map firstmatch_need_tables;
|
||||
|
||||
/* If ref-based access is used: bitmap of tables this table depends on */
|
||||
table_map ref_depend_map;
|
||||
bool is_used;
|
||||
|
||||
bool use_join_buffer;
|
||||
bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); }
|
||||
void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; }
|
||||
public:
|
||||
void set_empty()
|
||||
{
|
||||
invalidate_firstmatch_prefix();
|
||||
is_used= FALSE;
|
||||
}
|
||||
|
||||
void set_from_prev(struct st_position *prev);
|
||||
bool check_qep(JOIN *join,
|
||||
uint idx,
|
||||
table_map remaining_tables,
|
||||
const JOIN_TAB *new_join_tab,
|
||||
double *record_count,
|
||||
double *read_time,
|
||||
table_map *handled_fanout,
|
||||
sj_strategy_enum *strategy,
|
||||
struct st_position *loose_scan_pos);
|
||||
|
||||
void mark_used() { is_used= TRUE; }
|
||||
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
|
||||
};
|
||||
|
||||
|
||||
/* These form a stack of partial join order costs and output sizes */
|
||||
COST_VECT prefix_cost;
|
||||
double prefix_record_count;
|
||||
|
||||
/*
|
||||
Current optimization state: Semi-join strategy to be used for this
|
||||
and preceding join tables.
|
||||
|
||||
Join optimizer sets this for the *last* join_tab in the
|
||||
duplicate-generating range. That is, in order to interpret this field,
|
||||
one needs to traverse join->[best_]positions array from right to left.
|
||||
When you see a join table with sj_strategy!= SJ_OPT_NONE, some other
|
||||
field (depending on the strategy) tells how many preceding positions
|
||||
this applies to. The values of covered_preceding_positions->sj_strategy
|
||||
must be ignored.
|
||||
*/
|
||||
uint sj_strategy;
|
||||
/*
|
||||
Valid only after fix_semijoin_strategies_for_picked_join_order() call:
|
||||
if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that
|
||||
are covered by the specified semi-join strategy
|
||||
*/
|
||||
uint n_sj_tables;
|
||||
|
||||
/* LooseScan strategy members */
|
||||
|
||||
class LooseScan_picker : public Semi_join_strategy_picker
|
||||
{
|
||||
/* The first (i.e. driving) table we're doing loose scan for */
|
||||
uint first_loosescan_table;
|
||||
/*
|
||||
@ -573,36 +644,46 @@ typedef struct st_position
|
||||
uint loosescan_key; // final (one for strategy instance )
|
||||
uint loosescan_parts; /* Number of keyparts to be kept distinct */
|
||||
|
||||
/* FirstMatch strategy */
|
||||
/*
|
||||
Index of the first inner table that we intend to handle with this
|
||||
strategy
|
||||
*/
|
||||
uint first_firstmatch_table;
|
||||
/*
|
||||
Tables that were not in the join prefix when we've started considering
|
||||
FirstMatch strategy.
|
||||
*/
|
||||
table_map first_firstmatch_rtbl;
|
||||
/*
|
||||
Tables that need to be in the prefix before we can calculate the cost
|
||||
of using FirstMatch strategy.
|
||||
*/
|
||||
table_map firstmatch_need_tables;
|
||||
bool is_used;
|
||||
public:
|
||||
void set_empty()
|
||||
{
|
||||
first_loosescan_table= MAX_TABLES;
|
||||
is_used= FALSE;
|
||||
}
|
||||
|
||||
bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); }
|
||||
void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; }
|
||||
void set_from_prev(struct st_position *prev);
|
||||
bool check_qep(JOIN *join,
|
||||
uint idx,
|
||||
table_map remaining_tables,
|
||||
const JOIN_TAB *new_join_tab,
|
||||
double *record_count,
|
||||
double *read_time,
|
||||
table_map *handled_fanout,
|
||||
sj_strategy_enum *strategy,
|
||||
struct st_position *loose_scan_pos);
|
||||
void mark_used() { is_used= TRUE; }
|
||||
|
||||
/* Duplicate Weedout strategy */
|
||||
/* The first table that the strategy will need to handle */
|
||||
uint first_dupsweedout_table;
|
||||
/*
|
||||
Tables that we will need to have in the prefix to do the weedout step
|
||||
(all inner and all outer that the involved semi-joins are correlated with)
|
||||
*/
|
||||
table_map dupsweedout_tables;
|
||||
friend class Loose_scan_opt;
|
||||
friend void best_access_path(JOIN *join,
|
||||
JOIN_TAB *s,
|
||||
table_map remaining_tables,
|
||||
uint idx,
|
||||
bool disable_jbuf,
|
||||
double record_count,
|
||||
struct st_position *pos,
|
||||
struct st_position *loose_scan_pos);
|
||||
friend bool get_best_combination(JOIN *join);
|
||||
friend int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
|
||||
uint no_jbuf_after);
|
||||
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
|
||||
};
|
||||
|
||||
|
||||
class Sj_materialization_picker : public Semi_join_strategy_picker
|
||||
{
|
||||
bool is_used;
|
||||
|
||||
/* SJ-Materialization-Scan strategy */
|
||||
/* The last inner table (valid once we're after it) */
|
||||
uint sjm_scan_last_inner;
|
||||
/*
|
||||
@ -612,9 +693,101 @@ typedef struct st_position
|
||||
*/
|
||||
table_map sjm_scan_need_tables;
|
||||
|
||||
table_map prefix_dups_producing_tables;
|
||||
} POSITION;
|
||||
public:
|
||||
void set_empty()
|
||||
{
|
||||
sjm_scan_need_tables= 0;
|
||||
LINT_INIT(sjm_scan_last_inner);
|
||||
is_used= FALSE;
|
||||
}
|
||||
void set_from_prev(struct st_position *prev);
|
||||
bool check_qep(JOIN *join,
|
||||
uint idx,
|
||||
table_map remaining_tables,
|
||||
const JOIN_TAB *new_join_tab,
|
||||
double *record_count,
|
||||
double *read_time,
|
||||
table_map *handled_fanout,
|
||||
sj_strategy_enum *strategy,
|
||||
struct st_position *loose_scan_pos);
|
||||
void mark_used() { is_used= TRUE; }
|
||||
|
||||
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
Information about a position of table within a join order. Used in join
|
||||
optimization.
|
||||
*/
|
||||
typedef struct st_position
|
||||
{
|
||||
/* The table that's put into join order */
|
||||
JOIN_TAB *table;
|
||||
|
||||
/*
|
||||
The "fanout": number of output rows that will be produced (after
|
||||
pushed down selection condition is applied) per each row combination of
|
||||
previous tables.
|
||||
*/
|
||||
double records_read;
|
||||
|
||||
/*
|
||||
Cost accessing the table in course of the entire complete join execution,
|
||||
i.e. cost of one access method use (e.g. 'range' or 'ref' scan ) times
|
||||
number the access method will be invoked.
|
||||
*/
|
||||
double read_time;
|
||||
|
||||
/* Cumulative cost and record count for the join prefix */
|
||||
COST_VECT prefix_cost;
|
||||
double prefix_record_count;
|
||||
|
||||
/*
|
||||
NULL - 'index' or 'range' or 'index_merge' or 'ALL' access is used.
|
||||
Other - [eq_]ref[_or_null] access is used. Pointer to {t.keypart1 = expr}
|
||||
*/
|
||||
KEYUSE *key;
|
||||
|
||||
/* If ref-based access is used: bitmap of tables this table depends on */
|
||||
table_map ref_depend_map;
|
||||
|
||||
/*
|
||||
TRUE <=> join buffering will be used. At the moment this is based on
|
||||
*very* imprecise guesses made in best_access_path().
|
||||
*/
|
||||
bool use_join_buffer;
|
||||
|
||||
/*
|
||||
Current optimization state: Semi-join strategy to be used for this
|
||||
and preceding join tables.
|
||||
|
||||
Join optimizer sets this for the *last* join_tab in the
|
||||
duplicate-generating range. That is, in order to interpret this field,
|
||||
one needs to traverse join->[best_]positions array from right to left.
|
||||
When you see a join table with sj_strategy!= SJ_OPT_NONE, some other
|
||||
field (depending on the strategy) tells how many preceding positions
|
||||
this applies to. The values of covered_preceding_positions->sj_strategy
|
||||
must be ignored.
|
||||
*/
|
||||
enum sj_strategy_enum sj_strategy;
|
||||
|
||||
/*
|
||||
Valid only after fix_semijoin_strategies_for_picked_join_order() call:
|
||||
if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that
|
||||
are covered by the specified semi-join strategy
|
||||
*/
|
||||
uint n_sj_tables;
|
||||
|
||||
table_map prefix_dups_producing_tables;
|
||||
|
||||
table_map inner_tables_handled_with_other_sjs;
|
||||
|
||||
Duplicate_weedout_picker dups_weedout_picker;
|
||||
Firstmatch_picker firstmatch_picker;
|
||||
LooseScan_picker loosescan_picker;
|
||||
Sj_materialization_picker sjmat_picker;
|
||||
} POSITION;
|
||||
|
||||
typedef struct st_rollup
|
||||
{
|
||||
@ -626,18 +799,6 @@ typedef struct st_rollup
|
||||
} ROLLUP;
|
||||
|
||||
|
||||
#define SJ_OPT_NONE 0
|
||||
#define SJ_OPT_DUPS_WEEDOUT 1
|
||||
#define SJ_OPT_LOOSE_SCAN 2
|
||||
#define SJ_OPT_FIRST_MATCH 3
|
||||
#define SJ_OPT_MATERIALIZE 4
|
||||
#define SJ_OPT_MATERIALIZE_SCAN 5
|
||||
|
||||
inline bool sj_is_materialize_strategy(uint strategy)
|
||||
{
|
||||
return strategy >= SJ_OPT_MATERIALIZE;
|
||||
}
|
||||
|
||||
class JOIN_TAB_RANGE: public Sql_alloc
|
||||
{
|
||||
public:
|
||||
|
Reference in New Issue
Block a user