1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-29 05:21:33 +03:00

Semi-join optimizations code cleanup:

- Break down POSITION/advance_sj_state() into four classes 
  representing potential semi-join strategies.

- Treat all strategies uniformly (before, DuplicateWeedout 
  was special as it was the catch-all strategy. Now, we're 
  still relying on it to be the catch-all, but are able to 
  function,e.g. with firstmatch=on,duplicate_weedout=off.

- Update test results (checked)
This commit is contained in:
Sergey Petrunya
2011-11-23 04:25:52 +04:00
parent 7f746fbe74
commit 694ce95557
6 changed files with 780 additions and 489 deletions

View File

@ -158,6 +158,17 @@ enum enum_nested_loop_state
};
/* Possible sj_strategy values */
enum sj_strategy_enum
{
SJ_OPT_NONE=0,
SJ_OPT_DUPS_WEEDOUT=1,
SJ_OPT_LOOSE_SCAN =2,
SJ_OPT_FIRST_MATCH =3,
SJ_OPT_MATERIALIZE =4,
SJ_OPT_MATERIALIZE_SCAN=5
};
/* Values for JOIN_TAB::packed_info */
#define TAB_INFO_HAVE_VALUE 1
#define TAB_INFO_USING_INDEX 2
@ -374,7 +385,7 @@ typedef struct st_join_table {
POSITION::sj_strategy field. This field is set up by the
fix_semijoin_strategies_for_picked_join_order.
*/
uint sj_strategy;
enum sj_strategy_enum sj_strategy;
uint n_sj_tables;
@ -496,66 +507,126 @@ enum_nested_loop_state
end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
bool end_of_records);
/* psergey */
/**
Information about a position of table within a join order. Used in join
optimization.
*/
typedef struct st_position
struct st_position;
class Semi_join_strategy_picker
{
/*
The "fanout": number of output rows that will be produced (after
pushed down selection condition is applied) per each row combination of
previous tables.
*/
double records_read;
public:
/* Called when starting to build a new join prefix */
virtual void set_empty() = 0;
/*
Cost accessing the table in course of the entire complete join execution,
i.e. cost of one access method use (e.g. 'range' or 'ref' scan ) times
number the access method will be invoked.
Update internal state after another table has been added to the join
prefix
*/
double read_time;
JOIN_TAB *table;
/*
NULL - 'index' or 'range' or 'index_merge' or 'ALL' access is used.
Other - [eq_]ref[_or_null] access is used. Pointer to {t.keypart1 = expr}
*/
KEYUSE *key;
/* If ref-based access is used: bitmap of tables this table depends on */
table_map ref_depend_map;
bool use_join_buffer;
virtual void set_from_prev(struct st_position *prev) = 0;
virtual bool check_qep(JOIN *join,
uint idx,
table_map remaining_tables,
const JOIN_TAB *new_join_tab,
double *record_count,
double *read_time,
table_map *handled_fanout,
sj_strategy_enum *strategy,
struct st_position *loose_scan_pos) = 0;
virtual void mark_used() = 0;
virtual ~Semi_join_strategy_picker() {}
};
/*
Duplicate Weedout strategy optimization state
*/
class Duplicate_weedout_picker : public Semi_join_strategy_picker
{
/* The first table that the strategy will need to handle */
uint first_dupsweedout_table;
/*
Tables that we will need to have in the prefix to do the weedout step
(all inner and all outer that the involved semi-joins are correlated with)
*/
table_map dupsweedout_tables;
/* These form a stack of partial join order costs and output sizes */
COST_VECT prefix_cost;
double prefix_record_count;
bool is_used;
public:
void set_empty()
{
dupsweedout_tables= 0;
first_dupsweedout_table= MAX_TABLES;
is_used= FALSE;
}
void set_from_prev(struct st_position *prev);
bool check_qep(JOIN *join,
uint idx,
table_map remaining_tables,
const JOIN_TAB *new_join_tab,
double *record_count,
double *read_time,
table_map *handled_fanout,
sj_strategy_enum *stratey,
struct st_position *loose_scan_pos);
void mark_used() { is_used= TRUE; }
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
};
class Firstmatch_picker : public Semi_join_strategy_picker
{
/*
Current optimization state: Semi-join strategy to be used for this
and preceding join tables.
Join optimizer sets this for the *last* join_tab in the
duplicate-generating range. That is, in order to interpret this field,
one needs to traverse join->[best_]positions array from right to left.
When you see a join table with sj_strategy!= SJ_OPT_NONE, some other
field (depending on the strategy) tells how many preceding positions
this applies to. The values of covered_preceding_positions->sj_strategy
must be ignored.
Index of the first inner table that we intend to handle with this
strategy
*/
uint sj_strategy;
uint first_firstmatch_table;
/*
Valid only after fix_semijoin_strategies_for_picked_join_order() call:
if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that
are covered by the specified semi-join strategy
Tables that were not in the join prefix when we've started considering
FirstMatch strategy.
*/
uint n_sj_tables;
table_map first_firstmatch_rtbl;
/*
Tables that need to be in the prefix before we can calculate the cost
of using FirstMatch strategy.
*/
table_map firstmatch_need_tables;
/* LooseScan strategy members */
bool is_used;
bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); }
void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; }
public:
void set_empty()
{
invalidate_firstmatch_prefix();
is_used= FALSE;
}
void set_from_prev(struct st_position *prev);
bool check_qep(JOIN *join,
uint idx,
table_map remaining_tables,
const JOIN_TAB *new_join_tab,
double *record_count,
double *read_time,
table_map *handled_fanout,
sj_strategy_enum *strategy,
struct st_position *loose_scan_pos);
void mark_used() { is_used= TRUE; }
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
};
class LooseScan_picker : public Semi_join_strategy_picker
{
/* The first (i.e. driving) table we're doing loose scan for */
uint first_loosescan_table;
/*
@ -573,36 +644,46 @@ typedef struct st_position
uint loosescan_key; // final (one for strategy instance )
uint loosescan_parts; /* Number of keyparts to be kept distinct */
/* FirstMatch strategy */
/*
Index of the first inner table that we intend to handle with this
strategy
*/
uint first_firstmatch_table;
/*
Tables that were not in the join prefix when we've started considering
FirstMatch strategy.
*/
table_map first_firstmatch_rtbl;
/*
Tables that need to be in the prefix before we can calculate the cost
of using FirstMatch strategy.
*/
table_map firstmatch_need_tables;
bool is_used;
public:
void set_empty()
{
first_loosescan_table= MAX_TABLES;
is_used= FALSE;
}
bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); }
void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; }
void set_from_prev(struct st_position *prev);
bool check_qep(JOIN *join,
uint idx,
table_map remaining_tables,
const JOIN_TAB *new_join_tab,
double *record_count,
double *read_time,
table_map *handled_fanout,
sj_strategy_enum *strategy,
struct st_position *loose_scan_pos);
void mark_used() { is_used= TRUE; }
/* Duplicate Weedout strategy */
/* The first table that the strategy will need to handle */
uint first_dupsweedout_table;
/*
Tables that we will need to have in the prefix to do the weedout step
(all inner and all outer that the involved semi-joins are correlated with)
*/
table_map dupsweedout_tables;
friend class Loose_scan_opt;
friend void best_access_path(JOIN *join,
JOIN_TAB *s,
table_map remaining_tables,
uint idx,
bool disable_jbuf,
double record_count,
struct st_position *pos,
struct st_position *loose_scan_pos);
friend bool get_best_combination(JOIN *join);
friend int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
uint no_jbuf_after);
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
};
class Sj_materialization_picker : public Semi_join_strategy_picker
{
bool is_used;
/* SJ-Materialization-Scan strategy */
/* The last inner table (valid once we're after it) */
uint sjm_scan_last_inner;
/*
@ -612,9 +693,101 @@ typedef struct st_position
*/
table_map sjm_scan_need_tables;
table_map prefix_dups_producing_tables;
} POSITION;
public:
void set_empty()
{
sjm_scan_need_tables= 0;
LINT_INIT(sjm_scan_last_inner);
is_used= FALSE;
}
void set_from_prev(struct st_position *prev);
bool check_qep(JOIN *join,
uint idx,
table_map remaining_tables,
const JOIN_TAB *new_join_tab,
double *record_count,
double *read_time,
table_map *handled_fanout,
sj_strategy_enum *strategy,
struct st_position *loose_scan_pos);
void mark_used() { is_used= TRUE; }
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
};
/**
Information about a position of table within a join order. Used in join
optimization.
*/
typedef struct st_position
{
/* The table that's put into join order */
JOIN_TAB *table;
/*
The "fanout": number of output rows that will be produced (after
pushed down selection condition is applied) per each row combination of
previous tables.
*/
double records_read;
/*
Cost accessing the table in course of the entire complete join execution,
i.e. cost of one access method use (e.g. 'range' or 'ref' scan ) times
number the access method will be invoked.
*/
double read_time;
/* Cumulative cost and record count for the join prefix */
COST_VECT prefix_cost;
double prefix_record_count;
/*
NULL - 'index' or 'range' or 'index_merge' or 'ALL' access is used.
Other - [eq_]ref[_or_null] access is used. Pointer to {t.keypart1 = expr}
*/
KEYUSE *key;
/* If ref-based access is used: bitmap of tables this table depends on */
table_map ref_depend_map;
/*
TRUE <=> join buffering will be used. At the moment this is based on
*very* imprecise guesses made in best_access_path().
*/
bool use_join_buffer;
/*
Current optimization state: Semi-join strategy to be used for this
and preceding join tables.
Join optimizer sets this for the *last* join_tab in the
duplicate-generating range. That is, in order to interpret this field,
one needs to traverse join->[best_]positions array from right to left.
When you see a join table with sj_strategy!= SJ_OPT_NONE, some other
field (depending on the strategy) tells how many preceding positions
this applies to. The values of covered_preceding_positions->sj_strategy
must be ignored.
*/
enum sj_strategy_enum sj_strategy;
/*
Valid only after fix_semijoin_strategies_for_picked_join_order() call:
if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that
are covered by the specified semi-join strategy
*/
uint n_sj_tables;
table_map prefix_dups_producing_tables;
table_map inner_tables_handled_with_other_sjs;
Duplicate_weedout_picker dups_weedout_picker;
Firstmatch_picker firstmatch_picker;
LooseScan_picker loosescan_picker;
Sj_materialization_picker sjmat_picker;
} POSITION;
typedef struct st_rollup
{
@ -626,18 +799,6 @@ typedef struct st_rollup
} ROLLUP;
#define SJ_OPT_NONE 0
#define SJ_OPT_DUPS_WEEDOUT 1
#define SJ_OPT_LOOSE_SCAN 2
#define SJ_OPT_FIRST_MATCH 3
#define SJ_OPT_MATERIALIZE 4
#define SJ_OPT_MATERIALIZE_SCAN 5
inline bool sj_is_materialize_strategy(uint strategy)
{
return strategy >= SJ_OPT_MATERIALIZE;
}
class JOIN_TAB_RANGE: public Sql_alloc
{
public:
@ -808,7 +969,7 @@ public:
they produce.
*/
table_map cur_dups_producing_tables;
/* We also maintain a stack of join optimization states in * join->positions[] */
/******* Join optimization state members end *******/
/*