1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-01 03:47:19 +03:00

Subquery optimization backport:

- Factor out subquery code into sql/opt_subselect.{h,cc}
- Stop using the term "confluent" (was used due to misreading the dictionary)
This commit is contained in:
Sergey Petrunya
2010-02-16 00:53:06 +03:00
parent 4746ddf634
commit e0bb2f90a2
10 changed files with 3911 additions and 3761 deletions

View File

@ -1922,3 +1922,7 @@ libmysqld/examples/mysqltest.cc
extra/libevent/event-config.h
libmysqld/opt_table_elimination.cc
libmysqld/ha_federatedx.cc
libmysqld/multi_range_read.cc
libmysqld/opt_index_cond_pushdown.cc
libmysqld/opt_subselect.cc
libmysqld/sql_join_cache.cc

View File

@ -58,6 +58,7 @@ sqlsources = derror.cc field.cc field_conv.cc strfunc.cc filesort.cc \
log_event.cc rpl_record.cc \
log_event_old.cc rpl_record_old.cc \
protocol.cc net_serv.cc opt_range.cc \
opt_subselect.cc \
opt_sum.cc procedure.cc records.cc sql_acl.cc \
sql_load.cc discover.cc sql_locale.cc \
sql_profile.cc \

View File

@ -60,6 +60,7 @@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \
ha_ndbcluster_binlog.h ha_ndbcluster_tables.h \
ha_partition.h rpl_constants.h \
opt_range.h protocol.h rpl_tblmap.h rpl_utility.h \
opt_subselect.h \
rpl_reporting.h \
log.h log_slow.h sql_show.h rpl_rli.h rpl_mi.h \
sql_select.h structs.h table.h sql_udf.h hash_filo.h \
@ -102,7 +103,8 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc sql_partition.cc \
unireg.cc des_key_file.cc \
log_event.cc rpl_record.cc \
log_event_old.cc rpl_record_old.cc \
discover.cc time.cc opt_range.cc opt_sum.cc \
discover.cc time.cc opt_range.cc opt_subselect.cc \
opt_sum.cc \
records.cc filesort.cc handler.cc \
ha_partition.cc \
sql_db.cc sql_table.cc sql_rename.cc sql_crypt.cc \

View File

@ -31,10 +31,6 @@
#include "mysql_priv.h"
#include "sql_select.h"
inline Item * and_items(Item* cond, Item *item)
{
return (cond? (new Item_cond_and(cond, item)) : item);
}
Item_subselect::Item_subselect():
Item_result_field(), value_assigned(0), thd(0), substitution(0),
@ -1899,7 +1895,7 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
object, but we can't know it here, but here we need address correct
reference on left expresion.
//psergey: he means confluent cases like "... IN (SELECT 1)"
//psergey: he means degenerate cases like "... IN (SELECT 1)"
*/
if (!optimizer)
{

3455
sql/opt_subselect.cc Normal file

File diff suppressed because it is too large Load Diff

368
sql/opt_subselect.h Normal file
View File

@ -0,0 +1,368 @@
/* */
#ifdef USE_PRAGMA_INTERFACE
#pragma interface /* gcc class implementation */
#endif
int check_and_do_in_subquery_rewrites(JOIN *join);
bool convert_join_subqueries_to_semijoins(JOIN *join);
int pull_out_semijoin_tables(JOIN *join);
bool optimize_semijoin_nests(JOIN *join, table_map all_table_map);
// used by Loose_scan_opt
ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest,
table_map remaining_tables);
/*
This is a class for considering possible loose index scan optimizations.
It's usage pattern is as follows:
best_access_path()
{
Loose_scan_opt opt;
opt.init()
for each index we can do ref access with
{
opt.next_ref_key();
for each keyuse
opt.add_keyuse();
opt.check_ref_access();
}
if (some criteria for range scans)
opt.check_range_access();
opt.get_best_option();
}
*/
class Loose_scan_opt
{
public:
/* All methods must check this before doing anything else */
bool try_loosescan;
/*
If we consider (oe1, .. oeN) IN (SELECT ie1, .. ieN) then ieK=oeK is
called sj-equality. If oeK depends only on preceding tables then such
equality is called 'bound'.
*/
ulonglong bound_sj_equalities;
/* Accumulated properties of ref access we're now considering: */
ulonglong handled_sj_equalities;
key_part_map loose_scan_keyparts;
uint max_loose_keypart;
bool part1_conds_met;
/*
Use of quick select is a special case. Some of its properties:
*/
uint quick_uses_applicable_index;
uint quick_max_loose_keypart;
/* Best loose scan method so far */
uint best_loose_scan_key;
double best_loose_scan_cost;
double best_loose_scan_records;
KEYUSE *best_loose_scan_start_key;
uint best_max_loose_keypart;
Loose_scan_opt():
try_loosescan(FALSE),
bound_sj_equalities(0),
quick_uses_applicable_index(FALSE)
{
UNINIT_VAR(quick_max_loose_keypart); /* Protected by quick_uses_applicable_index */
/* The following are protected by best_loose_scan_cost!= DBL_MAX */
UNINIT_VAR(best_loose_scan_key);
UNINIT_VAR(best_loose_scan_records);
UNINIT_VAR(best_max_loose_keypart);
UNINIT_VAR(best_loose_scan_start_key);
}
void init(JOIN *join, JOIN_TAB *s, table_map remaining_tables)
{
/*
Discover the bound equalities. We need to do this if
1. The next table is an SJ-inner table, and
2. It is the first table from that semijoin, and
3. We're not within a semi-join range (i.e. all semi-joins either have
all or none of their tables in join_table_map), except
s->emb_sj_nest (which we've just entered, see #2).
4. All non-IN-equality correlation references from this sj-nest are
bound
5. But some of the IN-equalities aren't (so this can't be handled by
FirstMatch strategy)
*/
best_loose_scan_cost= DBL_MAX;
if (!join->emb_sjm_nest && s->emb_sj_nest && // (1)
s->emb_sj_nest->sj_in_exprs < 64 &&
((remaining_tables & s->emb_sj_nest->sj_inner_tables) == // (2)
s->emb_sj_nest->sj_inner_tables) && // (2)
join->cur_sj_inner_tables == 0 && // (3)
!(remaining_tables &
s->emb_sj_nest->nested_join->sj_corr_tables) && // (4)
remaining_tables & s->emb_sj_nest->nested_join->sj_depends_on &&// (5)
optimizer_flag(join->thd, OPTIMIZER_SWITCH_LOOSE_SCAN))
{
/* This table is an LooseScan scan candidate */
bound_sj_equalities= get_bound_sj_equalities(s->emb_sj_nest,
remaining_tables);
try_loosescan= TRUE;
DBUG_PRINT("info", ("Will try LooseScan scan, bound_map=%llx",
(longlong)bound_sj_equalities));
}
}
void next_ref_key()
{
handled_sj_equalities=0;
loose_scan_keyparts= 0;
max_loose_keypart= 0;
part1_conds_met= FALSE;
}
void add_keyuse(table_map remaining_tables, KEYUSE *keyuse)
{
if (try_loosescan && keyuse->sj_pred_no != UINT_MAX)
{
if (!(remaining_tables & keyuse->used_tables))
{
/*
This allows to use equality propagation to infer that some
sj-equalities are bound.
*/
bound_sj_equalities |= 1ULL << keyuse->sj_pred_no;
}
else
{
handled_sj_equalities |= 1ULL << keyuse->sj_pred_no;
loose_scan_keyparts |= ((key_part_map)1) << keyuse->keypart;
set_if_bigger(max_loose_keypart, keyuse->keypart);
}
}
}
bool have_a_case() { return test(handled_sj_equalities); }
void check_ref_access_part1(JOIN_TAB *s, uint key, KEYUSE *start_key,
table_map found_part)
{
/*
Check if we can use LooseScan semi-join strategy. We can if
1. This is the right table at right location
2. All IN-equalities are either
- "bound", ie. the outer_expr part refers to the preceding tables
- "handled", ie. covered by the index we're considering
3. Index order allows to enumerate subquery's duplicate groups in
order. This happens when the index definition matches this
pattern:
(handled_col|bound_col)* (other_col|bound_col)
*/
if (try_loosescan && // (1)
(handled_sj_equalities | bound_sj_equalities) == // (2)
PREV_BITS(ulonglong, s->emb_sj_nest->sj_in_exprs) && // (2)
(PREV_BITS(key_part_map, max_loose_keypart+1) & // (3)
(found_part | loose_scan_keyparts)) == // (3)
(found_part | loose_scan_keyparts) && // (3)
!key_uses_partial_cols(s->table, key))
{
/* Ok, can use the strategy */
part1_conds_met= TRUE;
if (s->quick && s->quick->index == key &&
s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
{
quick_uses_applicable_index= TRUE;
quick_max_loose_keypart= max_loose_keypart;
}
DBUG_PRINT("info", ("Can use LooseScan scan"));
/*
Check if this is a special case where there are no usable bound
IN-equalities, i.e. we have
outer_expr IN (SELECT innertbl.key FROM ...)
and outer_expr cannot be evaluated yet, so it's actually full
index scan and not a ref access
*/
if (!(found_part & 1 ) && /* no usable ref access for 1st key part */
s->table->covering_keys.is_set(key))
{
DBUG_PRINT("info", ("Can use full index scan for LooseScan"));
/* Calculate the cost of complete loose index scan. */
double records= rows2double(s->table->file->stats.records);
/* The cost is entire index scan cost (divided by 2) */
double read_time= s->table->file->index_only_read_time(key, records);
/*
Now find out how many different keys we will get (for now we
ignore the fact that we have "keypart_i=const" restriction for
some key components, that may make us think think that loose
scan will produce more distinct records than it actually will)
*/
ulong rpc;
if ((rpc= s->table->key_info[key].rec_per_key[max_loose_keypart]))
records= records / rpc;
// TODO: previous version also did /2
if (read_time < best_loose_scan_cost)
{
best_loose_scan_key= key;
best_loose_scan_cost= read_time;
best_loose_scan_records= records;
best_max_loose_keypart= max_loose_keypart;
best_loose_scan_start_key= start_key;
}
}
}
}
void check_ref_access_part2(uint key, KEYUSE *start_key, double records,
double read_time)
{
if (part1_conds_met && read_time < best_loose_scan_cost)
{
/* TODO use rec-per-key-based fanout calculations */
best_loose_scan_key= key;
best_loose_scan_cost= read_time;
best_loose_scan_records= records;
best_max_loose_keypart= max_loose_keypart;
best_loose_scan_start_key= start_key;
}
}
void check_range_access(JOIN *join, uint idx, QUICK_SELECT_I *quick)
{
/* TODO: this the right part restriction: */
if (quick_uses_applicable_index && idx == join->const_tables &&
quick->read_time < best_loose_scan_cost)
{
best_loose_scan_key= quick->index;
best_loose_scan_cost= quick->read_time;
/* this is ok because idx == join->const_tables */
best_loose_scan_records= rows2double(quick->records);
best_max_loose_keypart= quick_max_loose_keypart;
best_loose_scan_start_key= NULL;
}
}
void save_to_position(JOIN_TAB *tab, POSITION *pos)
{
pos->read_time= best_loose_scan_cost;
if (best_loose_scan_cost != DBL_MAX)
{
pos->records_read= best_loose_scan_records;
pos->key= best_loose_scan_start_key;
pos->loosescan_key= best_loose_scan_key;
pos->loosescan_parts= best_max_loose_keypart + 1;
pos->use_join_buffer= FALSE;
pos->table= tab;
// todo need ref_depend_map ?
DBUG_PRINT("info", ("Produced a LooseScan plan, key %s, %s",
tab->table->key_info[best_loose_scan_key].name,
best_loose_scan_start_key? "(ref access)":
"(range/index access)"));
}
}
};
void advance_sj_state(JOIN *join, const table_map remaining_tables,
const JOIN_TAB *new_join_tab, uint idx,
double *current_record_count, double *current_read_time,
POSITION *loose_scan_pos);
void restore_prev_sj_state(const table_map remaining_tables,
const JOIN_TAB *tab, uint idx);
void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
bool setup_sj_materialization(JOIN_TAB *tab);
TABLE *create_duplicate_weedout_tmp_table(THD *thd, uint uniq_tuple_length_arg,
SJ_TMP_TABLE *sjtbl);
int do_sj_reset(SJ_TMP_TABLE *sj_tbl);
int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl);
/*
Temporary table used by semi-join DuplicateElimination strategy
This consists of the temptable itself and data needed to put records
into it. The table's DDL is as follows:
CREATE TABLE tmptable (col VARCHAR(n) BINARY, PRIMARY KEY(col));
where the primary key can be replaced with unique constraint if n exceeds
the limit (as it is always done for query execution-time temptables).
The record value is a concatenation of rowids of tables from the join we're
executing. If a join table is on the inner side of the outer join, we
assume that its rowid can be NULL and provide means to store this rowid in
the tuple.
*/
class SJ_TMP_TABLE : public Sql_alloc
{
public:
/*
Array of pointers to tables whose rowids compose the temporary table
record.
*/
class TAB
{
public:
JOIN_TAB *join_tab;
uint rowid_offset;
ushort null_byte;
uchar null_bit;
};
TAB *tabs;
TAB *tabs_end;
/*
is_degenerate==TRUE means this is a special case where the temptable record
has zero length (and presence of a unique key means that the temptable can
have either 0 or 1 records).
In this case we don't create the physical temptable but instead record
its state in SJ_TMP_TABLE::have_degenerate_row.
*/
bool is_degenerate;
/*
When is_degenerate==TRUE: the contents of the table (whether it has the
record or not).
*/
bool have_degenerate_row;
/* table record parameters */
uint null_bits;
uint null_bytes;
uint rowid_len;
/* The temporary table itself (NULL means not created yet) */
TABLE *tmp_table;
/*
These are the members we got from temptable creation code. We'll need
them if we'll need to convert table from HEAP to MyISAM/Maria.
*/
ENGINE_COLUMNDEF *start_recinfo;
ENGINE_COLUMNDEF *recinfo;
/* Pointer to next table (next->start_idx > this->end_idx) */
SJ_TMP_TABLE *next;
};
int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
uint no_jbuf_after);
void destroy_sj_tmp_tables(JOIN *join);
int clear_sj_tmp_tables(JOIN *join);
int rewrite_to_index_subquery_engine(JOIN *join);

View File

@ -29,6 +29,7 @@
#include "mysql_priv.h"
#include "sql_select.h"
#include "opt_subselect.h"
/*****************************************************************************

View File

@ -605,7 +605,7 @@ public:
List<TABLE_LIST> top_join_list; /* join list of the top level */
List<TABLE_LIST> *join_list; /* list for the currently parsed join */
TABLE_LIST *embedding; /* table embedding to the above list */
List<TABLE_LIST> sj_nests;
List<TABLE_LIST> sj_nests; /* Semi-join nests within this join */
/*
Beginning of the list of leaves in a FROM clause, where the leaves
inlcude all base tables including view tables. The tables are connected

File diff suppressed because it is too large Load Diff

View File

@ -28,6 +28,12 @@
#include "procedure.h"
#include <myisam.h>
#if defined(WITH_MARIA_STORAGE_ENGINE) && defined(USE_MARIA_FOR_TMP_TABLES)
#include "../storage/maria/ha_maria.h"
#define TMP_ENGINE_HTON maria_hton
#else
#define TMP_ENGINE_HTON myisam_hton
#endif
/* Values in optimize */
#define KEY_OPTIMIZE_EXISTS 1
#define KEY_OPTIMIZE_REF_OR_NULL 2
@ -1195,7 +1201,6 @@ enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab, bool
end_of_records);
enum_nested_loop_state sub_select_sjm(JOIN *join, JOIN_TAB *join_tab,
bool end_of_records);
int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl);
enum_nested_loop_state
end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
@ -1329,74 +1334,6 @@ typedef struct st_rollup
List<Item> *fields;
} ROLLUP;
/*
Temporary table used by semi-join DuplicateElimination strategy
This consists of the temptable itself and data needed to put records
into it. The table's DDL is as follows:
CREATE TABLE tmptable (col VARCHAR(n) BINARY, PRIMARY KEY(col));
where the primary key can be replaced with unique constraint if n exceeds
the limit (as it is always done for query execution-time temptables).
The record value is a concatenation of rowids of tables from the join we're
executing. If a join table is on the inner side of the outer join, we
assume that its rowid can be NULL and provide means to store this rowid in
the tuple.
*/
class SJ_TMP_TABLE : public Sql_alloc
{
public:
/*
Array of pointers to tables whose rowids compose the temporary table
record.
*/
class TAB
{
public:
JOIN_TAB *join_tab;
uint rowid_offset;
ushort null_byte;
uchar null_bit;
};
TAB *tabs;
TAB *tabs_end;
/*
is_confluent==TRUE means this is a special case where the temptable record
has zero length (and presence of a unique key means that the temptable can
have either 0 or 1 records).
In this case we don't create the physical temptable but instead record
its state in SJ_TMP_TABLE::have_confluent_record.
*/
bool is_confluent;
/*
When is_confluent==TRUE: the contents of the table (whether it has the
record or not).
*/
bool have_confluent_row;
/* table record parameters */
uint null_bits;
uint null_bytes;
uint rowid_len;
/* The temporary table itself (NULL means not created yet) */
TABLE *tmp_table;
/*
These are the members we got from temptable creation code. We'll need
them if we'll need to convert table from HEAP to MyISAM/Maria.
*/
ENGINE_COLUMNDEF *start_recinfo;
ENGINE_COLUMNDEF *recinfo;
/* Pointer to next table (next->start_idx > this->end_idx) */
SJ_TMP_TABLE *next;
};
#define SJ_OPT_NONE 0
#define SJ_OPT_DUPS_WEEDOUT 1
@ -1711,7 +1648,6 @@ public:
Item_sum ***func);
int rollup_send_data(uint idx);
int rollup_write_data(uint idx, TABLE *table);
void remove_subq_pushed_predicates(Item **where);
/**
Release memory and, if possible, the open tables held by this execution
plan (and nested plans). It's used to release some tables before
@ -1763,11 +1699,6 @@ void TEST_join(JOIN *join);
/* Extern functions in sql_select.cc */
bool store_val_in_field(Field *field, Item *val, enum_check_fields check_flag);
TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
ORDER *group, bool distinct, bool save_sum_fields,
ulonglong select_options, ha_rows rows_limit,
char* alias);
void free_tmp_table(THD *thd, TABLE *entry);
void count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param,
List<Item> &fields, bool reset_with_sum_func);
bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
@ -1776,10 +1707,6 @@ bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
uint elements, List<Item> &fields);
void copy_fields(TMP_TABLE_PARAM *param);
void copy_funcs(Item **func_ptr);
bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
ENGINE_COLUMNDEF *start_recinfo,
ENGINE_COLUMNDEF **recinfo,
int error, bool ignore_last_dupp_key_error);
uint find_shortest_key(TABLE *table, const key_map *usable_keys);
Field* create_tmp_field_from_field(THD *thd, Field* org_field,
const char *name, TABLE *table,
@ -1955,13 +1882,59 @@ int test_if_item_cache_changed(List<Cached_item> &list);
void calc_used_field_length(THD *thd, JOIN_TAB *join_tab);
int join_init_read_record(JOIN_TAB *tab);
void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key);
inline Item * and_items(Item* cond, Item *item)
{
return (cond? (new Item_cond_and(cond, item)) : item);
}
bool choose_plan(JOIN *join,table_map join_tables);
void get_partial_join_cost(JOIN *join, uint n_tables, double *read_time_arg,
double *record_count_arg);
void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab,
table_map last_remaining_tables,
bool first_alt, uint no_jbuf_before,
double *reopt_rec_count, double *reopt_cost,
double *sj_inner_fanout);
Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
bool *inherited_fl);
bool test_if_ref(COND *root_cond,
Item_field *left_item,Item *right_item);
inline bool optimizer_flag(THD *thd, uint flag)
{
return (thd->variables.optimizer_switch & flag);
}
/* Table elimination entry point function */
void eliminate_tables(JOIN *join);
/* Index Condition Pushdown entry point function */
void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok);
/****************************************************************************
Temporary table support for SQL Runtime
***************************************************************************/
#define STRING_TOTAL_LENGTH_TO_PACK_ROWS 128
#define AVG_STRING_LENGTH_TO_PACK_ROWS 64
#define RATIO_TO_PACK_ROWS 2
#define MIN_STRING_LENGTH_TO_PACK_ROWS 10
TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
ORDER *group, bool distinct, bool save_sum_fields,
ulonglong select_options, ha_rows rows_limit,
char* alias);
void free_tmp_table(THD *thd, TABLE *entry);
bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
ENGINE_COLUMNDEF *start_recinfo,
ENGINE_COLUMNDEF **recinfo,
int error, bool ignore_last_dupp_key_error);
bool create_internal_tmp_table(TABLE *table, KEY *keyinfo,
ENGINE_COLUMNDEF *start_recinfo,
ENGINE_COLUMNDEF **recinfo,
ulonglong options);
bool open_tmp_table(TABLE *table);
void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps);