1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

Optimizer code cleanups, no logic changes

- Updated comments
- Added some extra DEBUG
- Indentation changes and break long lines
- Trivial code changes like:
  - Combining 2 statements in one
  - Reorder DBUG lines
  - Use a variable to store a pointer that is used multiple times
- Moved declaration of variables to start of loop/function
- Removed dead or commented code
- Removed wrong DBUG_EXECUTE code in best_extension_by_limited_search()
This commit is contained in:
Monty
2021-10-06 12:31:19 +03:00
committed by Sergei Petrunia
parent 87d4d7232c
commit 4062fc28bd
12 changed files with 283 additions and 230 deletions

View File

@@ -19,8 +19,8 @@
Fix that MAYBE_KEY are stored in the tree so that we can detect use
of full hash keys for queries like:
select s.id, kws.keyword_id from sites as s,kws where s.id=kws.site_id and kws.keyword_id in (204,205);
select s.id, kws.keyword_id from sites as s,kws where s.id=kws.site_id and
kws.keyword_id in (204,205);
*/
/*
@@ -2626,7 +2626,8 @@ static int fill_used_fields_bitmap(PARAM *param)
In the table struct the following information is updated:
quick_keys - Which keys can be used
quick_rows - How many rows the key matches
opt_range_condition_rows - E(# rows that will satisfy the table condition)
opt_range_condition_rows - E(# rows that will satisfy the table
condition)
IMPLEMENTATION
opt_range_condition_rows value is obtained as follows:
@@ -2774,7 +2775,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
thd->no_errors=1; // Don't warn about NULL
init_sql_alloc(key_memory_quick_range_select_root, &alloc,
thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
thd->variables.range_alloc_block_size, 0,
MYF(MY_THREAD_SPECIFIC));
if (!(param.key_parts=
(KEY_PART*) alloc_root(&alloc,
sizeof(KEY_PART) *
@@ -2931,6 +2933,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
TRP_INDEX_INTERSECT *intersect_trp;
bool can_build_covering= FALSE;
Json_writer_object trace_range(thd, "analyzing_range_alternatives");
TABLE_READ_PLAN *range_trp;
backup_keys= (SEL_ARG**) alloca(sizeof(backup_keys[0])*param.keys);
memcpy(&backup_keys[0], &tree->keys[0],
@@ -2939,9 +2942,9 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
remove_nonrange_trees(&param, tree);
/* Get best 'range' plan and prepare data for making other plans */
if (auto range_trp= get_key_scans_params(&param, tree,
only_single_index_range_scan,
true, best_read_time))
if ((range_trp= get_key_scans_params(&param, tree,
only_single_index_range_scan,
true, best_read_time)))
{
best_trp= range_trp;
best_read_time= best_trp->read_cost;
@@ -3048,7 +3051,6 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
{
grp_summary.add("chosen", true);
best_trp= group_trp;
best_read_time= best_trp->read_cost;
}
else
grp_summary.add("chosen", false).add("cause", "cost");
@@ -3112,7 +3114,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
SYNOPSIS
create_key_parts_for_pseudo_indexes()
param IN/OUT data structure for the descriptors to be built
used_fields bitmap of columns for which the descriptors are to be built
used_fields bitmap of columns for which the descriptors are to be built
DESCRIPTION
For each column marked in the bitmap used_fields the function builds
@@ -3209,9 +3211,10 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
SYNOPSIS
records_in_column_ranges()
param the data structure to access descriptors of pseudo indexes
built over columns used in the condition of the processed query
built over columns used in the condition of the processed
query
idx the index of the descriptor of interest in param
tree the tree representing ranges built for the interesting column
tree the tree representing ranges built for the interesting column
DESCRIPTION
This function retrieves the ranges represented by the SEL_ARG 'tree' and
@@ -3334,7 +3337,8 @@ int cmp_quick_ranges(TABLE *table, uint *a, uint *b)
DESCRIPTION
This function calculates the selectivity of range conditions cond imposed
on the rows of 'table' in the processed query.
The calculated selectivity is assigned to the field table->cond_selectivity.
The calculated selectivity is assigned to the field
table->cond_selectivity.
Selectivity is calculated as a product of selectivities imposed by:
@@ -3346,6 +3350,8 @@ int cmp_quick_ranges(TABLE *table, uint *a, uint *b)
3. Reading a few records from the table pages and checking the condition
selectivity (this is used for conditions like "column LIKE '%val%'"
where approaches #1 and #2 do not provide selectivity data).
4. If the selectivity calculated by get_best_ror_intersect() is smaller,
use this instead.
NOTE
Currently the selectivities of range conditions over different columns are
@@ -3362,6 +3368,9 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
MY_BITMAP *used_fields= &table->cond_set;
double table_records= (double)table->stat_records();
uint optimal_key_order[MAX_KEY];
MY_BITMAP handled_columns;
my_bitmap_map *buf;
QUICK_SELECT_I *quick;
DBUG_ENTER("calculate_cond_selectivity_for_table");
table->cond_selectivity= 1.0;
@@ -3369,7 +3378,6 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
if (table_records == 0)
DBUG_RETURN(FALSE);
QUICK_SELECT_I *quick;
if ((quick=table->reginfo.join_tab->quick) &&
quick->get_type() == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
{
@@ -3377,14 +3385,14 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
DBUG_RETURN(FALSE);
}
if (!*cond)
if (!*cond || table->pos_in_table_list->schema_table)
DBUG_RETURN(FALSE);
if (table->pos_in_table_list->schema_table)
DBUG_RETURN(FALSE);
MY_BITMAP handled_columns;
my_bitmap_map* buf;
/*
This should be pre-alloced so that we could use the same bitmap for all
tables. Would also avoid extra memory allocations if this function would
be called multiple times per query.
*/
if (!(buf= (my_bitmap_map*)thd->alloc(table->s->column_bitmap_size)))
DBUG_RETURN(TRUE);
my_bitmap_init(&handled_columns, buf, table->s->fields);
@@ -3512,7 +3520,8 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
double rows;
init_sql_alloc(key_memory_quick_range_select_root, &alloc,
thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
thd->variables.range_alloc_block_size, 0,
MYF(MY_THREAD_SPECIFIC));
param.thd= thd;
param.mem_root= &alloc;
param.old_root= thd->mem_root;
@@ -3531,9 +3540,7 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
thd->no_errors=1;
tree= cond[0]->get_mm_tree(&param, cond);
if (!tree)
if (!(tree= cond[0]->get_mm_tree(&param, cond)))
goto free_alloc;
table->reginfo.impossible_range= 0;
@@ -3557,7 +3564,7 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
for (uint idx= 0; idx < param.keys; idx++)
{
SEL_ARG *key= tree->keys[idx];
if (key)
if (key) // Quick range found for key
{
Json_writer_object selectivity_for_column(thd);
selectivity_for_column.add("column_name", key->field->field_name);
@@ -5671,8 +5678,7 @@ bool create_fields_bitmap(PARAM *param, MY_BITMAP *fields_bitmap)
static
int cmp_intersect_index_scan(INDEX_SCAN_INFO **a, INDEX_SCAN_INFO **b)
{
return (*a)->records < (*b)->records ?
-1 : (*a)->records == (*b)->records ? 0 : 1;
return CMP_NUM((*a)->records, (*b)->records);
}
@@ -6269,7 +6275,8 @@ bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
size_t max_memory_size= common_info->max_memory_size;
records_sent_to_unique+= ext_index_scan_records;
cost= Unique::get_use_cost(buff_elems, (size_t) records_sent_to_unique, key_size,
cost= Unique::get_use_cost(buff_elems, (size_t) records_sent_to_unique,
key_size,
max_memory_size, compare_factor, TRUE,
&next->in_memory);
if (records_filtered_out_by_cpk)
@@ -6584,6 +6591,11 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
if (bitmap_is_set(&param->needed_fields, key_part->fieldnr-1))
bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr-1);
}
/*
Cost of reading the keys for the rows, which are later stored in the
ror queue.
*/
ror_scan->index_read_cost=
param->table->file->keyread_time(ror_scan->keynr, 1, ror_scan->records);
DBUG_RETURN(ror_scan);
@@ -6895,7 +6907,7 @@ static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info,
avoid duplicating the inference code)
NOTES
Adding a ROR scan to ROR-intersect "makes sense" iff the cost of ROR-
Adding a ROR scan to ROR-intersect "makes sense" if the cost of ROR-
intersection decreases. The cost of ROR-intersection is calculated as
follows:
@@ -7057,8 +7069,12 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
{
uint idx;
double min_cost= DBL_MAX;
DBUG_ENTER("get_best_ror_intersect");
THD *thd= param->thd;
DBUG_ENTER("get_best_ror_intersect");
DBUG_PRINT("enter", ("opt_range_condition_rows: %llu cond_selectivity: %g",
(ulonglong) param->table->opt_range_condition_rows,
param->table->cond_selectivity));
Json_writer_object trace_ror(thd, "analyzing_roworder_intersect");
if ((tree->n_ror_scans < 2) || !param->table->stat_records() ||
@@ -7267,6 +7283,8 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
? "too few indexes to merge"
: "cost");
}
DBUG_PRINT("enter", ("opt_range_condition_rows: %llu",
(ulonglong) param->table->opt_range_condition_rows));
DBUG_RETURN(trp);
}
@@ -11535,7 +11553,8 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
bool *is_ror_scan)
{
SEL_ARG_RANGE_SEQ seq;
RANGE_SEQ_IF seq_if = {NULL, sel_arg_range_seq_init, sel_arg_range_seq_next, 0, 0};
RANGE_SEQ_IF seq_if=
{NULL, sel_arg_range_seq_init, sel_arg_range_seq_next, 0, 0};
handler *file= param->table->file;
ha_rows rows= HA_POS_ERROR;
uint keynr= param->real_keynr[idx];
@@ -11603,24 +11622,24 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
This check is needed as sometimes that table statistics or range
estimates may be slightly out of sync.
*/
rows= table_records;
set_if_bigger(rows, 1);
rows= MY_MAX(table_records, 1);
param->quick_rows[keynr]= rows;
}
param->possible_keys.set_bit(keynr);
if (update_tbl_stats)
{
TABLE::OPT_RANGE *range= param->table->opt_range + keynr;
param->table->opt_range_keys.set_bit(keynr);
param->table->opt_range[keynr].key_parts= param->max_key_parts;
param->table->opt_range[keynr].ranges= param->range_count;
range->key_parts= param->max_key_parts;
range->ranges= param->range_count;
param->table->opt_range_condition_rows=
MY_MIN(param->table->opt_range_condition_rows, rows);
param->table->opt_range[keynr].rows= rows;
param->table->opt_range[keynr].cost= cost->total_cost();
range->rows= rows;
range->cost= cost->total_cost();
if (param->table->file->is_clustering_key(keynr))
param->table->opt_range[keynr].index_only_cost= 0;
range->index_only_cost= 0;
else
param->table->opt_range[keynr].index_only_cost= cost->index_only_cost();
range->index_only_cost= cost->index_only_cost();
}
}
@@ -14821,11 +14840,13 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
set_if_smaller(num_groups, table_records);
if (used_key_parts > group_key_parts)
{ /*
{
/*
Compute the probability that two ends of a subgroup are inside
different blocks.
*/
keys_per_subgroup= (ha_rows) index_info->actual_rec_per_key(used_key_parts - 1);
keys_per_subgroup= (ha_rows) index_info->actual_rec_per_key(used_key_parts -
1);
if (keys_per_subgroup >= keys_per_block) /* If a subgroup is bigger than */
p_overlap= 1.0; /* a block, it will overlap at least two blocks. */
else
@@ -14849,10 +14870,13 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
reads the next record without having to re-position to it on every
group. To make the CPU cost reflect this, we estimate the CPU cost
as the sum of:
1. Cost for evaluating the condition (similarly as for index scan).
1. Cost for evaluating the condition for each num_group
(1/TIME_FOR_COMPARE) (similarly as for index scan).
2. Cost for navigating the index structure (assuming a b-tree).
Note: We only add the cost for one comparision per block. For a
b-tree the number of comparisons will be larger.
Note: We only add the cost for one index comparision per block. For a
b-tree the number of comparisons will be larger. However the cost
is low as all of the upper level b-tree blocks should be in
memory.
TODO: This cost should be provided by the storage engine.
*/
const double tree_traversal_cost=
@@ -14860,8 +14884,8 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
log(static_cast<double>(keys_per_block))) *
1/(2*TIME_FOR_COMPARE);
const double cpu_cost= num_groups *
(tree_traversal_cost + 1/TIME_FOR_COMPARE_IDX);
const double cpu_cost= (num_groups *
(tree_traversal_cost + 1/TIME_FOR_COMPARE_IDX));
*read_cost= io_cost + cpu_cost;
*records= num_groups;
@@ -14910,7 +14934,8 @@ TRP_GROUP_MIN_MAX::make_quick(PARAM *param, bool retrieve_full_rows,
group_prefix_len, group_key_parts,
used_key_parts, index_info, index,
read_cost, records, key_infix_len,
key_infix, parent_alloc, is_index_scan);
key_infix, parent_alloc,
is_index_scan);
if (!quick)
DBUG_RETURN(NULL);