1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-29 05:21:33 +03:00

Semi-join optimizations code cleanup:

- Break down POSITION/advance_sj_state() into four classes 
  representing potential semi-join strategies.

- Treat all strategies uniformly (before, DuplicateWeedout 
  was special as it was the catch-all strategy. Now, we're 
  still relying on it to be the catch-all, but are able to 
  function,e.g. with firstmatch=on,duplicate_weedout=off.

- Update test results (checked)
This commit is contained in:
Sergey Petrunya
2011-11-23 04:25:52 +04:00
parent 7f746fbe74
commit 694ce95557
6 changed files with 780 additions and 489 deletions

View File

@ -323,8 +323,8 @@ WHERE Language='English' AND Percentage > 10 AND
t2.Population > 100000);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 range Population,Country Population 4 NULL 1 Using index condition; Rowid-ordered scan; Start temporary
1 PRIMARY t2 eq_ref PRIMARY,Population PRIMARY 3 test.t1.Country 1 Using where
1 PRIMARY t3 eq_ref PRIMARY,Percentage PRIMARY 33 test.t1.Country,const 1 Using index condition; Using where; End temporary
1 PRIMARY t2 eq_ref PRIMARY,Population PRIMARY 3 test.t1.Country 1 Using where; End temporary
1 PRIMARY t3 eq_ref PRIMARY,Percentage PRIMARY 33 test.t1.Country,const 1 Using index condition; Using where
set optimizer_switch=@bug35674_save_optimizer_switch;
DROP TABLE t1,t2,t3;
CREATE TABLE t1 (

View File

@ -332,8 +332,8 @@ WHERE Language='English' AND Percentage > 10 AND
t2.Population > 100000);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 range Population,Country Population 4 NULL 1 Using index condition; Rowid-ordered scan; Start temporary
1 PRIMARY t2 eq_ref PRIMARY,Population PRIMARY 3 test.t1.Country 1 Using where; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan
1 PRIMARY t3 eq_ref PRIMARY,Percentage PRIMARY 33 test.t1.Country,const 1 Using index condition; Using where; End temporary; Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
1 PRIMARY t2 eq_ref PRIMARY,Population PRIMARY 3 test.t1.Country 1 Using where; End temporary; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan
1 PRIMARY t3 eq_ref PRIMARY,Percentage PRIMARY 33 test.t1.Country,const 1 Using index condition; Using where; Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
set optimizer_switch=@bug35674_save_optimizer_switch;
DROP TABLE t1,t2,t3;
CREATE TABLE t1 (

File diff suppressed because it is too large Load Diff

View File

@ -263,8 +263,8 @@ public:
{
pos->records_read= best_loose_scan_records;
pos->key= best_loose_scan_start_key;
pos->loosescan_key= best_loose_scan_key;
pos->loosescan_parts= best_max_loose_keypart + 1;
pos->loosescan_picker.loosescan_key= best_loose_scan_key;
pos->loosescan_picker.loosescan_parts= best_max_loose_keypart + 1;
pos->use_join_buffer= FALSE;
pos->table= tab;
// todo need ref_depend_map ?
@ -277,8 +277,7 @@ public:
};
void advance_sj_state(JOIN *join, const table_map remaining_tables,
const JOIN_TAB *new_join_tab, uint idx,
void advance_sj_state(JOIN *join, const table_map remaining_tables, uint idx,
double *current_record_count, double *current_read_time,
POSITION *loose_scan_pos);
void restore_prev_sj_state(const table_map remaining_tables,

View File

@ -85,7 +85,7 @@ static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const
static bool find_best(JOIN *join,table_map rest_tables,uint index,
double record_count,double read_time);
static uint cache_record_length(JOIN *join,uint index);
static bool get_best_combination(JOIN *join);
bool get_best_combination(JOIN *join);
static store_key *get_store_key(THD *thd,
KEYUSE *keyuse, table_map used_tables,
KEY_PART_INFO *key_part, uchar *key_buff,
@ -4883,7 +4883,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
join->positions[idx].records_read=1.0; /* This is a const table */
join->positions[idx].ref_depend_map= 0;
join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
// join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
join->positions[idx].sj_strategy= SJ_OPT_NONE;
join->positions[idx].use_join_buffer= FALSE;
@ -5533,7 +5533,7 @@ best_access_path(JOIN *join,
pos->key= best_key;
pos->table= s;
pos->ref_depend_map= best_ref_depends_map;
pos->loosescan_key= MAX_KEY;
pos->loosescan_picker.loosescan_key= MAX_KEY;
pos->use_join_buffer= best_uses_jbuf;
loose_scan_opt.save_to_position(s, loose_scan_pos);
@ -5840,7 +5840,7 @@ optimize_straight_join(JOIN *join, table_map join_tables)
/* compute the cost of the new plan extended with 's' */
record_count*= join->positions[idx].records_read;
read_time+= join->positions[idx].read_time;
advance_sj_state(join, join_tables, s, idx, &record_count, &read_time,
advance_sj_state(join, join_tables, idx, &record_count, &read_time,
&loose_scan_pos);
join_tables&= ~(s->table->map);
@ -6356,7 +6356,7 @@ best_extension_by_limited_search(JOIN *join,
current_record_count= record_count * position->records_read;
current_read_time= read_time + position->read_time;
advance_sj_state(join, remaining_tables, s, idx, &current_record_count,
advance_sj_state(join, remaining_tables, idx, &current_record_count,
&current_read_time, &loose_scan_pos);
/* Expand only partial plans with lower cost than the best QEP so far */
@ -6513,7 +6513,7 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
*/
double current_record_count=record_count*records;
double current_read_time=read_time+best;
advance_sj_state(join, rest_tables, s, idx, &current_record_count,
advance_sj_state(join, rest_tables, idx, &current_record_count,
&current_read_time, &loose_scan_pos);
if (best_record_count > current_record_count ||
@ -7013,7 +7013,7 @@ static Item * const null_ptr= NULL;
TRUE Out of memory
*/
static bool
bool
get_best_combination(JOIN *join)
{
uint tablenr;
@ -7091,13 +7091,6 @@ get_best_combination(JOIN *join)
*j= *join->best_positions[tablenr].table;
#if 0
/* SJ-Materialization is represented with join tab ranges */
if (j->sj_strategy == SJ_OPT_MATERIALIZE ||
j->sj_strategy == SJ_OPT_MATERIALIZE)
j->sj_strategy= SJ_OPT_NONE;
#endif
j->bush_root_tab= sjm_nest_root;
form=join->table[tablenr]=j->table;
@ -7120,7 +7113,7 @@ get_best_combination(JOIN *join)
(join->best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN))
{
j->type=JT_ALL;
j->index= join->best_positions[tablenr].loosescan_key;
j->index= join->best_positions[tablenr].loosescan_picker.loosescan_key;
if (tablenr != join->const_tables)
join->full_join=1;
}

View File

@ -158,6 +158,17 @@ enum enum_nested_loop_state
};
/* Possible sj_strategy values */
enum sj_strategy_enum
{
SJ_OPT_NONE=0,
SJ_OPT_DUPS_WEEDOUT=1,
SJ_OPT_LOOSE_SCAN =2,
SJ_OPT_FIRST_MATCH =3,
SJ_OPT_MATERIALIZE =4,
SJ_OPT_MATERIALIZE_SCAN=5
};
/* Values for JOIN_TAB::packed_info */
#define TAB_INFO_HAVE_VALUE 1
#define TAB_INFO_USING_INDEX 2
@ -374,7 +385,7 @@ typedef struct st_join_table {
POSITION::sj_strategy field. This field is set up by the
fix_semijoin_strategies_for_picked_join_order.
*/
uint sj_strategy;
enum sj_strategy_enum sj_strategy;
uint n_sj_tables;
@ -496,66 +507,126 @@ enum_nested_loop_state
end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
bool end_of_records);
/* psergey */
/**
Information about a position of table within a join order. Used in join
optimization.
struct st_position;
class Semi_join_strategy_picker
{
public:
/* Called when starting to build a new join prefix */
virtual void set_empty() = 0;
/*
Update internal state after another table has been added to the join
prefix
*/
virtual void set_from_prev(struct st_position *prev) = 0;
virtual bool check_qep(JOIN *join,
uint idx,
table_map remaining_tables,
const JOIN_TAB *new_join_tab,
double *record_count,
double *read_time,
table_map *handled_fanout,
sj_strategy_enum *strategy,
struct st_position *loose_scan_pos) = 0;
virtual void mark_used() = 0;
virtual ~Semi_join_strategy_picker() {}
};
/*
Duplicate Weedout strategy optimization state
*/
typedef struct st_position
class Duplicate_weedout_picker : public Semi_join_strategy_picker
{
/* The first table that the strategy will need to handle */
uint first_dupsweedout_table;
/*
Tables that we will need to have in the prefix to do the weedout step
(all inner and all outer that the involved semi-joins are correlated with)
*/
table_map dupsweedout_tables;
bool is_used;
public:
void set_empty()
{
dupsweedout_tables= 0;
first_dupsweedout_table= MAX_TABLES;
is_used= FALSE;
}
void set_from_prev(struct st_position *prev);
bool check_qep(JOIN *join,
uint idx,
table_map remaining_tables,
const JOIN_TAB *new_join_tab,
double *record_count,
double *read_time,
table_map *handled_fanout,
sj_strategy_enum *stratey,
struct st_position *loose_scan_pos);
void mark_used() { is_used= TRUE; }
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
};
class Firstmatch_picker : public Semi_join_strategy_picker
{
/*
The "fanout": number of output rows that will be produced (after
pushed down selection condition is applied) per each row combination of
previous tables.
Index of the first inner table that we intend to handle with this
strategy
*/
double records_read;
uint first_firstmatch_table;
/*
Cost accessing the table in course of the entire complete join execution,
i.e. cost of one access method use (e.g. 'range' or 'ref' scan ) times
number the access method will be invoked.
Tables that were not in the join prefix when we've started considering
FirstMatch strategy.
*/
double read_time;
JOIN_TAB *table;
table_map first_firstmatch_rtbl;
/*
NULL - 'index' or 'range' or 'index_merge' or 'ALL' access is used.
Other - [eq_]ref[_or_null] access is used. Pointer to {t.keypart1 = expr}
*/
KEYUSE *key;
Tables that need to be in the prefix before we can calculate the cost
of using FirstMatch strategy.
*/
table_map firstmatch_need_tables;
/* If ref-based access is used: bitmap of tables this table depends on */
table_map ref_depend_map;
bool is_used;
bool use_join_buffer;
bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); }
void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; }
public:
void set_empty()
{
invalidate_firstmatch_prefix();
is_used= FALSE;
}
void set_from_prev(struct st_position *prev);
bool check_qep(JOIN *join,
uint idx,
table_map remaining_tables,
const JOIN_TAB *new_join_tab,
double *record_count,
double *read_time,
table_map *handled_fanout,
sj_strategy_enum *strategy,
struct st_position *loose_scan_pos);
void mark_used() { is_used= TRUE; }
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
};
/* These form a stack of partial join order costs and output sizes */
COST_VECT prefix_cost;
double prefix_record_count;
/*
Current optimization state: Semi-join strategy to be used for this
and preceding join tables.
Join optimizer sets this for the *last* join_tab in the
duplicate-generating range. That is, in order to interpret this field,
one needs to traverse join->[best_]positions array from right to left.
When you see a join table with sj_strategy!= SJ_OPT_NONE, some other
field (depending on the strategy) tells how many preceding positions
this applies to. The values of covered_preceding_positions->sj_strategy
must be ignored.
*/
uint sj_strategy;
/*
Valid only after fix_semijoin_strategies_for_picked_join_order() call:
if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that
are covered by the specified semi-join strategy
*/
uint n_sj_tables;
/* LooseScan strategy members */
class LooseScan_picker : public Semi_join_strategy_picker
{
/* The first (i.e. driving) table we're doing loose scan for */
uint first_loosescan_table;
/*
@ -573,36 +644,46 @@ typedef struct st_position
uint loosescan_key; // final (one for strategy instance )
uint loosescan_parts; /* Number of keyparts to be kept distinct */
/* FirstMatch strategy */
/*
Index of the first inner table that we intend to handle with this
strategy
*/
uint first_firstmatch_table;
/*
Tables that were not in the join prefix when we've started considering
FirstMatch strategy.
*/
table_map first_firstmatch_rtbl;
/*
Tables that need to be in the prefix before we can calculate the cost
of using FirstMatch strategy.
*/
table_map firstmatch_need_tables;
bool is_used;
public:
void set_empty()
{
first_loosescan_table= MAX_TABLES;
is_used= FALSE;
}
bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); }
void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; }
void set_from_prev(struct st_position *prev);
bool check_qep(JOIN *join,
uint idx,
table_map remaining_tables,
const JOIN_TAB *new_join_tab,
double *record_count,
double *read_time,
table_map *handled_fanout,
sj_strategy_enum *strategy,
struct st_position *loose_scan_pos);
void mark_used() { is_used= TRUE; }
/* Duplicate Weedout strategy */
/* The first table that the strategy will need to handle */
uint first_dupsweedout_table;
/*
Tables that we will need to have in the prefix to do the weedout step
(all inner and all outer that the involved semi-joins are correlated with)
*/
table_map dupsweedout_tables;
friend class Loose_scan_opt;
friend void best_access_path(JOIN *join,
JOIN_TAB *s,
table_map remaining_tables,
uint idx,
bool disable_jbuf,
double record_count,
struct st_position *pos,
struct st_position *loose_scan_pos);
friend bool get_best_combination(JOIN *join);
friend int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
uint no_jbuf_after);
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
};
class Sj_materialization_picker : public Semi_join_strategy_picker
{
bool is_used;
/* SJ-Materialization-Scan strategy */
/* The last inner table (valid once we're after it) */
uint sjm_scan_last_inner;
/*
@ -612,9 +693,101 @@ typedef struct st_position
*/
table_map sjm_scan_need_tables;
table_map prefix_dups_producing_tables;
} POSITION;
public:
void set_empty()
{
sjm_scan_need_tables= 0;
LINT_INIT(sjm_scan_last_inner);
is_used= FALSE;
}
void set_from_prev(struct st_position *prev);
bool check_qep(JOIN *join,
uint idx,
table_map remaining_tables,
const JOIN_TAB *new_join_tab,
double *record_count,
double *read_time,
table_map *handled_fanout,
sj_strategy_enum *strategy,
struct st_position *loose_scan_pos);
void mark_used() { is_used= TRUE; }
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
};
/**
Information about a position of table within a join order. Used in join
optimization.
*/
typedef struct st_position
{
/* The table that's put into join order */
JOIN_TAB *table;
/*
The "fanout": number of output rows that will be produced (after
pushed down selection condition is applied) per each row combination of
previous tables.
*/
double records_read;
/*
Cost accessing the table in course of the entire complete join execution,
i.e. cost of one access method use (e.g. 'range' or 'ref' scan ) times
number the access method will be invoked.
*/
double read_time;
/* Cumulative cost and record count for the join prefix */
COST_VECT prefix_cost;
double prefix_record_count;
/*
NULL - 'index' or 'range' or 'index_merge' or 'ALL' access is used.
Other - [eq_]ref[_or_null] access is used. Pointer to {t.keypart1 = expr}
*/
KEYUSE *key;
/* If ref-based access is used: bitmap of tables this table depends on */
table_map ref_depend_map;
/*
TRUE <=> join buffering will be used. At the moment this is based on
*very* imprecise guesses made in best_access_path().
*/
bool use_join_buffer;
/*
Current optimization state: Semi-join strategy to be used for this
and preceding join tables.
Join optimizer sets this for the *last* join_tab in the
duplicate-generating range. That is, in order to interpret this field,
one needs to traverse join->[best_]positions array from right to left.
When you see a join table with sj_strategy!= SJ_OPT_NONE, some other
field (depending on the strategy) tells how many preceding positions
this applies to. The values of covered_preceding_positions->sj_strategy
must be ignored.
*/
enum sj_strategy_enum sj_strategy;
/*
Valid only after fix_semijoin_strategies_for_picked_join_order() call:
if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that
are covered by the specified semi-join strategy
*/
uint n_sj_tables;
table_map prefix_dups_producing_tables;
table_map inner_tables_handled_with_other_sjs;
Duplicate_weedout_picker dups_weedout_picker;
Firstmatch_picker firstmatch_picker;
LooseScan_picker loosescan_picker;
Sj_materialization_picker sjmat_picker;
} POSITION;
typedef struct st_rollup
{
@ -626,18 +799,6 @@ typedef struct st_rollup
} ROLLUP;
#define SJ_OPT_NONE 0
#define SJ_OPT_DUPS_WEEDOUT 1
#define SJ_OPT_LOOSE_SCAN 2
#define SJ_OPT_FIRST_MATCH 3
#define SJ_OPT_MATERIALIZE 4
#define SJ_OPT_MATERIALIZE_SCAN 5
inline bool sj_is_materialize_strategy(uint strategy)
{
return strategy >= SJ_OPT_MATERIALIZE;
}
class JOIN_TAB_RANGE: public Sql_alloc
{
public: