1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

MDEV-30812: Improve output cardinality estimates for hash join

Introduce @@optimizer_switch flag: hash_join_cardinality

When it is on, use EITS statistics to produce tighter bounds for
hash join output cardinality.

Amended by Monty.

Reviewed by: Monty <monty@mariadb.org>
This commit is contained in:
Sergei Petrunia
2023-03-09 17:04:07 +03:00
parent 2eb7bf1ec3
commit 4329ec5d3b
10 changed files with 363 additions and 53 deletions

View File

@@ -7597,20 +7597,28 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
Estimate how many records we will get if we read just this table and apply
a part of WHERE that can be checked for it.
@param s Current JOIN_TAB
@param use_cond_selectivity Value of optimizer_use_condition_selectivity.
If > 1 then use table->cond_selecitivity.
@param force_estiamte Set to 1 if we should not call
use_found_constraint. To be deleted in 11.0
@return 0.0 No matching rows
@return >= 1.0 Number of expected matching rows
@detail
Estimate how many records we will get if we
- read the given table with its "independent" access method (either quick
select or full table/index scan),
- apply the part of WHERE that refers only to this table.
@seealso
@see also
table_cond_selectivity() produces selectivity of condition that is checked
after joining rows from this table to rows from preceding tables.
*/
inline
double matching_candidates_in_table(JOIN_TAB *s, bool with_found_constraint,
uint use_cond_selectivity)
static double apply_selectivity_for_table(JOIN_TAB *s,
uint use_cond_selectivity,
bool *force_estimate)
{
ha_rows records;
double dbl_records;
@@ -7621,34 +7629,47 @@ double matching_candidates_in_table(JOIN_TAB *s, bool with_found_constraint,
double sel= table->cond_selectivity;
double table_records= rows2double(s->records);
dbl_records= table_records * sel;
*force_estimate= 1; // Don't call use_found_constraint()
return dbl_records;
}
records = s->found_records;
/*
If there is a filtering condition on the table (i.e. ref analyzer found
at least one "table.keyXpartY= exprZ", where exprZ refers only to tables
preceding this table in the join order we're now considering), then
assume that 25% of the rows will be filtered out by this condition.
This heuristic is supposed to force tables used in exprZ to be before
this table in join order.
If applicable, get a more accurate estimate.
*/
if (with_found_constraint)
records-= records/4;
/*
If applicable, get a more accurate estimate. Don't use the two
heuristics at once.
*/
DBUG_ASSERT(s->table->opt_range_condition_rows <= s->found_records);
if (s->table->opt_range_condition_rows != s->found_records)
{
*force_estimate= 1; // Don't call use_found_constraint()
records= s->table->opt_range_condition_rows;
}
dbl_records= (double)records;
return dbl_records;
}
/*
Take into account that the table's WHERE clause has conditions on earlier
tables that can reduce the number of accepted rows.
@param records Number of original rows (after selectivity)
If there is a filtering condition on the table (i.e. ref analyzer found
at least one "table.keyXpartY= exprZ", where exprZ refers only to tables
preceding this table in the join order we're now considering), then
assume that 25% of the rows will be filtered out by this condition.
This heuristic is supposed to force tables used in exprZ to be before
this table in join order.
*/
inline double use_found_constraint(double records)
{
records-= records/4;
return records;
}
/*
Calculate the cost of reading a set of rows trough an index
@@ -7705,6 +7726,92 @@ double adjust_quick_cost(double quick_cost, ha_rows records)
}
/*
@brief
Compute the fanout of hash join operation using EITS data
*/
double hash_join_fanout(JOIN *join, JOIN_TAB *s, table_map remaining_tables,
double rnd_records, KEYUSE *hj_start_key,
bool *stats_found)
{
THD *thd= join->thd;
/*
Before doing the hash join, we will scan the table and apply the local part
of the WHERE condition. This will produce rnd_records.
The EITS statistics describes the entire table. Calling
table->field[N]->get_avg_frequency()
produces average #rows in the table with some value.
What happens if we filter out rows so that rnd_records rows are left?
Something between the two outcomes:
A. filtering removes a fraction of rows for each value:
avg_frequency=avg_frequency * condition_selectivity
B. filtering removes entire groups of rows with the same value, but
the remaining groups remain of the same size.
We make pessimistic assumption and assume B.
We also handle an edge case: if rnd_records is less than avg_frequency,
assume we'll get rnd_records rows with the same value, and return
rnd_records as the fanout estimate.
*/
double min_freq= rnd_records;
Json_writer_object trace_obj(thd, "hash_join_cardinality");
/*
There can be multiple KEYUSE referring to same or different columns
KEYUSE(tbl.col1 = ...)
KEYUSE(tbl.col1 = ...)
KEYUSE(tbl.col2 = ...)
Hash join code can use multiple columns: (col1, col2) for joining.
We need n_distinct({col1, col2}).
EITS only has statistics on individual columns: n_distinct(col1),
n_distinct(col2).
Our current solution is to be very conservative and use selectivity
of one column with the lowest avg_frequency.
In the future, we should an approach that cautiosly takes into account
multiple KEYUSEs either multiply by number of equalities or by sqrt
of the second most selective equality.
*/
Json_writer_array trace_arr(thd, "hash_join_columns");
for (KEYUSE *keyuse= hj_start_key;
keyuse->table == s->table && is_hash_join_key_no(keyuse->key);
keyuse++)
{
if (!(remaining_tables & keyuse->used_tables) &&
(!keyuse->validity_ref || *keyuse->validity_ref) &&
s->access_from_tables_is_allowed(keyuse->used_tables,
join->sjm_lookup_tables))
{
Field *field= s->table->field[keyuse->keypart];
if (is_eits_usable(field))
{
double freq= field->read_stats->get_avg_frequency();
Json_writer_object trace_field(thd);
trace_field.add("field",field->field_name.str).
add("avg_frequency", freq);
if (freq < min_freq)
min_freq= freq;
*stats_found= 1;
}
}
}
trace_arr.end();
trace_obj.add("rows", min_freq);
return min_freq;
}
/**
Find the best access path for an extension of a partial execution
plan and add this path to the plan.
@@ -8399,11 +8506,44 @@ best_access_path(JOIN *join,
(!(s->table->map & join->outer_join) ||
join->allowed_outer_join_with_cache)) // (2)
{
double fanout;
Json_writer_object trace_access_hash(thd);
double join_sel= 0.1;
trace_access_hash.add("type", "hash");
trace_access_hash.add("index", "hj-key");
double join_sel;
bool stats_found= 0, force_estimate= 0;
/* Estimate the cost of the hash join access to the table */
double rnd_records= matching_candidates_in_table(s, found_constraint,
use_cond_selectivity);
double rnd_records= apply_selectivity_for_table(s, use_cond_selectivity,
&force_estimate);
DBUG_ASSERT(hj_start_key);
if (optimizer_flag(thd, OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY))
{
/*
Starting from this point, rnd_records should not be used anymore.
Use "fanout" for an estimate of # matching records.
*/
fanout= hash_join_fanout(join, s, remaining_tables, rnd_records,
hj_start_key, &stats_found);
join_sel= 1.0; // Don't do the "10% heuristic"
}
if (!stats_found)
{
/*
No OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY or no field statistics
found.
Take into account if there is non constant constraints used with
earlier tables in the where expression.
If yes, this will set fanout to rnd_records/4.
We estimate that there will be HASH_FANOUT (10%)
hash matches / row.
*/
if (found_constraint && !force_estimate)
rnd_records= use_found_constraint(rnd_records);
fanout= rnd_records;
join_sel= 0.1;
}
tmp= s->quick ? s->quick->read_time : s->scan_time();
double cmp_time= (s->records - rnd_records)/TIME_FOR_COMPARE;
@@ -8415,19 +8555,36 @@ best_access_path(JOIN *join,
record_count /
(double) thd->variables.join_buff_size));
tmp= COST_MULT(tmp, refills);
best_time= COST_ADD(tmp,
COST_MULT((record_count*join_sel) / TIME_FOR_COMPARE,
rnd_records));
// Add cost of reading/writing the join buffer
if (optimizer_flag(thd, OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY))
{
/* Set it to be 1/10th of TIME_FOR_COMPARE */
double row_copy_cost= 1.0 / (10*TIME_FOR_COMPARE);
double join_buffer_operations=
COST_ADD(
COST_MULT(record_count, row_copy_cost),
COST_MULT(record_count, fanout * (idx - join->const_tables))
);
double jbuf_use_cost= row_copy_cost * join_buffer_operations;
trace_access_hash.add("jbuf_use_cost", jbuf_use_cost);
tmp= COST_ADD(tmp, jbuf_use_cost);
}
double where_cost= COST_MULT((fanout*join_sel) / TIME_FOR_COMPARE,
record_count);
trace_access_hash.add("extra_cond_check_cost", where_cost);
best_time= COST_ADD(tmp, where_cost);
best= tmp;
records= rnd_records;
records= fanout;
best_key= hj_start_key;
best_ref_depends_map= 0;
best_uses_jbuf= TRUE;
best_filter= 0;
best_type= JT_HASH;
trace_access_hash.add("type", "hash");
trace_access_hash.add("index", "hj-key");
trace_access_hash.add("cost", rnd_records);
trace_access_hash.add("records", records);
trace_access_hash.add("cost", best);
trace_access_hash.add("chosen", true);
}
@@ -8479,9 +8636,13 @@ best_access_path(JOIN *join,
!(s->table->force_index && best_key && !s->quick) && // (4)
!(best_key && s->table->pos_in_table_list->jtbm_subselect)) // (5)
{ // Check full join
double rnd_records= matching_candidates_in_table(s, found_constraint,
use_cond_selectivity);
bool force_estimate= 0;
double rnd_records= apply_selectivity_for_table(s,
use_cond_selectivity,
&force_estimate);
rnd_records= ((found_constraint && !force_estimate) ?
use_found_constraint(rnd_records) :
rnd_records);
/*
Range optimizer never proposes a RANGE if it isn't better
than FULL: so if RANGE is present, it's always preferred to FULL.
@@ -9698,7 +9859,7 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
with previous tables.
For quick selects and full table scans, selectivity of COND(this_table)
is accounted for in matching_candidates_in_table(). Here, we only count
is accounted for in apply_selectivity_for_table(). Here, we only count
selectivity of COND(this_table, previous_tables).
For other access methods, we need to calculate selectivity of the whole
@@ -9900,7 +10061,7 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
/*
The table is accessed with full table scan, or quick select.
Selectivity of COND(table) is already accounted for in
matching_candidates_in_table().
apply_selectivity_for_table().
*/
sel= 1;
}