1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

Added 'records_out' and join_type to POSITION

records_out is the numbers of rows expected to be accepted from a table.
records_read is in contrast the number of rows that the optimizer excepts
to read from the engine.

This patch causes not plan changes. The differences in test results comes
from renaming "records" to "records_read" and printing of record_out in
the optimizer trace.

Other things:
- Renamed table_cond_selectivity() to table_after_join_selectivity()
  to make the purpose of the function more clear.
This commit is contained in:
Monty
2022-04-11 17:59:36 +03:00
committed by Sergei Petrunia
parent 9db877c9ec
commit 2387ee9b45
13 changed files with 1192 additions and 198 deletions

View File

@@ -89,6 +89,18 @@
*/
#define HASH_FANOUT 0.1
/*
The following is used to check that A <= B, but with some margin as the
calculation is done slightly differently (mathematically correct, but
double calculations are not exact).
This is only used when comparing read rows and output rows, which
means that we can assume that both values are >= 0 and B cannot be notable
smaller than A.
*/
#define crash_if_first_double_is_bigger(A,B) DBUG_ASSERT(((A) == 0.0 && (B) == 0.0) || (A)/(B) < 1.0000001)
/* Cost for reading a row through an index */
struct INDEX_READ_COST
{
@@ -318,7 +330,7 @@ static JOIN_TAB *next_breadth_first_tab(JOIN_TAB *first_top_tab,
static bool find_order_in_list(THD *, Ref_ptr_array, TABLE_LIST *, ORDER *,
List<Item> &, List<Item> &, bool, bool, bool);
static double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
static double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
table_map rem_tables);
void set_postjoin_aggr_write_func(JOIN_TAB *tab);
@@ -421,7 +433,7 @@ bool dbug_user_var_equals_str(THD *thd, const char *name, const char* value)
POSITION::POSITION()
{
table= 0;
records_read= cond_selectivity= read_time= 0.0;
records_read= cond_selectivity= read_time= records_out= 0.0;
prefix_record_count= 0.0;
key= 0;
use_join_buffer= 0;
@@ -5818,6 +5830,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
Json_writer_object table_records(thd);
/* Only one matching row */
s->found_records= s->records= 1;
s->records_out= 1.0;
s->read_time=1.0;
s->worst_seeks=1.0;
table_records.add_table_name(s)
@@ -7686,6 +7699,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
join->positions[idx].table= table;
join->positions[idx].key=key;
join->positions[idx].records_read=1.0; /* This is a const table */
join->positions[idx].records_out=1.0; /* This is a const table */
join->positions[idx].cond_selectivity= 1.0;
join->positions[idx].ref_depend_map= 0;
@@ -8022,6 +8036,7 @@ best_access_path(JOIN *join,
my_bool found_constraint= 0;
double best_cost= DBL_MAX;
double records= DBL_MAX;
double records_out= table->stat_records() * table->cond_selectivity;
table_map best_ref_depends_map= 0;
/*
key_dependent is 0 if all key parts could be used or if there was an
@@ -8317,9 +8332,12 @@ best_access_path(JOIN *join,
(1.0 +
((double) (table->s->max_key_length-keyinfo->key_length) /
(double) table->s->max_key_length)));
set_if_smaller(records, (double)s->records);
set_if_smaller(records_out, records);
if (records < 2.0)
records=2.0; /* Can't be as good as a unique */
}
/*
ReuseRangeEstimateForRef-2: We get here if we could not reuse
E(#rows) from range optimizer. Make another try:
@@ -8357,9 +8375,10 @@ best_access_path(JOIN *join,
}
}
/* Calculate the cost of the index access */
INDEX_READ_COST cost= cost_for_index_read(thd, table, key,
(ha_rows) records,
(ha_rows) s->worst_seeks);
INDEX_READ_COST cost=
cost_for_index_read(thd, table, key,
(ha_rows) records,
(ha_rows) s->worst_seeks);
tmp= cost.read_cost;
index_only_cost= cost.index_only_cost;
}
@@ -8631,7 +8650,8 @@ best_access_path(JOIN *join,
records,
tmp,
index_only_cost,
record_count);
record_count,
&records_out);
if (filter)
filter= filter->apply_filter(thd, table, &tmp, &records_after_filter,
&startup_cost,
@@ -8673,6 +8693,7 @@ best_access_path(JOIN *join,
add("chosen", false).
add("cause", cause ? cause : "cost");
}
set_if_smaller(records_out, records);
} /* for each key */
records= best_records;
}
@@ -8726,6 +8747,8 @@ best_access_path(JOIN *join,
/* Estimate the cost of the hash join access to the table */
double rnd_records= matching_candidates_in_table(s, found_constraint,
use_cond_selectivity);
set_if_smaller(records_out, rnd_records);
/*
The following cost calculation is identical to the cost calculation for
the join cache later on, except for the HASH_FANOUT
@@ -8876,7 +8899,8 @@ best_access_path(JOIN *join,
table->best_range_rowid_filter_for_partial_join(key_no, rows2double(range->rows),
range->find_cost,
range->index_only_cost,
record_count);
record_count,
&records_out);
if (filter)
{
double filter_cost= range->fetch_cost;
@@ -8905,6 +8929,7 @@ best_access_path(JOIN *join,
{
type= JT_INDEX_MERGE;
}
set_if_smaller(records_out, records_after_filter);
loose_scan_opt.check_range_access(join, idx, s->quick);
}
else
@@ -8913,7 +8938,10 @@ best_access_path(JOIN *join,
rnd_records= matching_candidates_in_table(s, found_constraint,
use_cond_selectivity);
records_after_filter= rnd_records;
set_if_smaller(records_out, rnd_records);
org_records= rows2double(s->records);
DBUG_ASSERT(rnd_records <= s->records);
/* Estimate cost of reading table. */
@@ -9043,7 +9071,9 @@ best_access_path(JOIN *join,
}
/* Update the cost information for the current partial plan */
crash_if_first_double_is_bigger(records_out, records);
pos->records_read= records;
pos->records_out= records_out;
pos->read_time= best_cost;
pos->key= best_key;
pos->type= best_type;
@@ -9070,7 +9100,7 @@ best_access_path(JOIN *join,
trace_paths.end();
if (unlikely(thd->trace_started()))
print_best_access_for_table(thd, pos, best_type);
print_best_access_for_table(thd, pos);
DBUG_VOID_RETURN;
}
@@ -9575,8 +9605,8 @@ optimize_straight_join(JOIN *join, table_map remaining_tables)
remaining_tables&= ~(s->table->map);
double pushdown_cond_selectivity= 1.0;
if (use_cond_selectivity > 1)
pushdown_cond_selectivity= table_cond_selectivity(join, idx, s,
remaining_tables);
pushdown_cond_selectivity= table_after_join_selectivity(join, idx, s,
remaining_tables);
position->cond_selectivity= pushdown_cond_selectivity;
++idx;
}
@@ -10113,8 +10143,8 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
*/
static
double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
table_map rem_tables)
double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
table_map rem_tables)
{
uint16 ref_keyuse_steps_buf[MAX_REF_PARTS];
uint ref_keyuse_size= MAX_REF_PARTS;
@@ -10890,9 +10920,10 @@ best_extension_by_limited_search(JOIN *join,
pushdown_cond_selectivity= 1.0;
if (use_cond_selectivity > 1)
pushdown_cond_selectivity= table_cond_selectivity(join, idx, s,
remaining_tables &
~real_table_bit);
pushdown_cond_selectivity=
table_after_join_selectivity(join, idx, s,
remaining_tables & ~real_table_bit);
join->positions[idx].cond_selectivity= pushdown_cond_selectivity;
partial_join_cardinality= (current_record_count *
@@ -10903,8 +10934,9 @@ best_extension_by_limited_search(JOIN *join,
.add("selectivity", pushdown_cond_selectivity)
.add("estimated_join_cardinality", partial_join_cardinality);
if ((search_depth > 1) &&
if (search_depth > 1 &&
((remaining_tables & ~real_table_bit) & allowed_tables))
{
/* Recursively expand the current partial plan */
Json_writer_array trace_rest(thd, "rest_of_plan");
@@ -11715,6 +11747,7 @@ bool JOIN::get_best_combination()
*/
SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info;
j->records_read= (sjm->is_sj_scan? sjm->rows : 1.0);
j->records_out= j->records_read;
j->records= (ha_rows) j->records_read;
j->cond_selectivity= 1.0;
JOIN_TAB *jt;
@@ -11770,8 +11803,15 @@ bool JOIN::get_best_combination()
to access join->best_positions[].
*/
j->records_read= best_positions[tablenr].records_read;
j->records_out= best_positions[tablenr].records_out;
j->cond_selectivity= best_positions[tablenr].cond_selectivity;
DBUG_ASSERT(j->cond_selectivity <= 1.0);
crash_if_first_double_is_bigger(j->records_out,
j->records_read *
(j->range_rowid_filter_info ?
j->range_rowid_filter_info->selectivity :
1.0));
map2table[j->table->tablenr]= j;
/* If we've reached the end of sjm nest, switch back to main sequence */
@@ -13071,7 +13111,12 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
/* Fix for EXPLAIN */
if (sel->quick)
join->best_positions[i].records_read= (double)sel->quick->records;
{
join->best_positions[i].records_read=
(double) sel->quick->records;
set_if_smaller(join->best_positions[i].records_out,
join->best_positions[i].records_read);
}
}
else
{
@@ -18542,9 +18587,10 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab,
table_map real_table_bit= rs->table->map;
if (join->thd->variables.optimizer_use_condition_selectivity > 1)
{
pushdown_cond_selectivity= table_cond_selectivity(join, i, rs,
reopt_remaining_tables &
~real_table_bit);
pushdown_cond_selectivity=
table_after_join_selectivity(join, i, rs,
reopt_remaining_tables &
~real_table_bit);
}
(*outer_rec_count) *= pushdown_cond_selectivity;
if (!rs->emb_sj_nest)
@@ -22673,7 +22719,7 @@ join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos)
{ // Info for DESCRIBE
tab->info= ET_CONST_ROW_NOT_FOUND;
/* Mark for EXPLAIN that the row was not found */
pos->records_read=0.0;
pos->records_read= pos->records_out= 0.0;
pos->ref_depend_map= 0;
if (!table->pos_in_table_list->outer_join || error > 0)
DBUG_RETURN(error);
@@ -22691,7 +22737,7 @@ join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos)
{
tab->info= ET_UNIQUE_ROW_NOT_FOUND;
/* Mark for EXPLAIN that the row was not found */
pos->records_read=0.0;
pos->records_read= pos->records_out= 0.0;
pos->ref_depend_map= 0;
if (!table->pos_in_table_list->outer_join || error > 0)
DBUG_RETURN(error);