From 515b9ad05a6de9dac3871ef2769dde1b5834c6e3 Mon Sep 17 00:00:00 2001 From: Monty Date: Thu, 2 Jun 2022 19:47:23 +0300 Subject: [PATCH] Added EQ_REF chaining to the greedy_optimizer MDEV-28073 Slow query performance in MariaDB when using many table The idea is to prefer and chain EQ_REF tables (tables that uses an unique key to find a row) when searching for the best table combination. This significantly reduces row combinations that has to be examined. This is optimization is enabled when setting optimizer_prune_level=2 (which is now default). Implementation: - optimizer_prune_level has a new level, 2, which enables EQ_REF optimization in addition to the pruning done by level 1. Level 2 is now default. - Added JOIN::eq_ref_tables that contains bits of tables that could use potentially use EQ_REF access in the query. This is calculated in sort_and_filter_keyuse() Under optimizer_prune_level=2: - When the greedy_optimizer notices that the preceding table was an EQ_REF table, it tries to add an EQ_REF table next. If an EQ_REF table exists, only this one will be considered at this level. We also collect all EQ_REF tables chained by the next levels and these are ignored on the starting level as we have already examined these. If no EQ_REF table exists, we continue as normal. This optimization speeds up the greedy_optimizer combination test with ~25% Other things: - I ported the changes in MySQL 5.7 to greedy_optimizer.test to MariaDB to be able to ensure we can handle all cases that MySQL can do. - I have run all tests with --mysqld=--optimizer_prune_level=1 to verify that there where no test changes. --- mysql-test/include/check_qep.inc | 57 + .../include/execute_with_statistics.inc | 30 + mysql-test/include/expect_qep.inc | 45 + mysql-test/main/greedy_optimizer.result | 2220 ++++++- mysql-test/main/greedy_optimizer.test | 573 +- mysql-test/main/join.result | 4 +- mysql-test/main/join.test | 4 +- mysql-test/main/join_nested.result | 2 +- mysql-test/main/join_nested_jcl6.result | 2 +- mysql-test/main/join_outer_innodb.result | 8 +- mysql-test/main/mysqld--help.result | 7 +- mysql-test/main/opt_trace.result | 5246 +++++++++-------- mysql-test/main/opt_trace.test | 1 + mysql-test/main/opt_trace_index_merge.result | 34 +- .../main/opt_trace_index_merge_innodb.result | 51 +- mysql-test/main/opt_trace_security.result | 68 +- mysql-test/main/selectivity_innodb.result | 4 +- mysql-test/main/stat_tables.result | 2 +- mysql-test/main/stat_tables_innodb.result | 2 +- mysql-test/main/subselect2.result | 14 +- mysql-test/main/subselect2.test | 4 +- mysql-test/main/subselect_mat.result | 15 + mysql-test/main/subselect_sj_mat.result | 15 + mysql-test/main/subselect_sj_mat.test | 1 + .../r/optimizer_prune_level_basic.result | 44 +- .../sys_vars/r/sysvars_server_embedded.result | 4 +- .../r/sysvars_server_notembedded.result | 4 +- .../t/optimizer_prune_level_basic.test | 14 +- sql/my_json_writer.cc | 2 - sql/opt_split.cc | 2 +- sql/sql_select.cc | 281 +- sql/sql_select.h | 4 +- sql/sys_vars.cc | 5 +- 33 files changed, 6187 insertions(+), 2582 deletions(-) create mode 100644 mysql-test/include/check_qep.inc create mode 100644 mysql-test/include/execute_with_statistics.inc create mode 100644 mysql-test/include/expect_qep.inc diff --git a/mysql-test/include/check_qep.inc b/mysql-test/include/check_qep.inc new file mode 100644 index 00000000000..ee5a4025400 --- /dev/null +++ b/mysql-test/include/check_qep.inc @@ -0,0 +1,57 @@ +# include/check_qep.inc +# +# SUMMARY +# +# Designed to be used together with include/expect_qep.inc +# +# $query should be assigned a select statement using +# straight_join to force the tables to be joined in most +# optimal order. +# +# expect_qep.inc will then store the estimated 'Last_query_cost' +# and total # 'Handler_read%' for this straight_joined query. +# +# We should then assign a non-straight_join'ed version of +# the same query to $query and execute it using +# 'include/check_qep.inc'. Its estimated cost and +# #handler_reads will then be verified against the +# previous straight_joined query. +# +# USAGE +# +# let $query= ; +# --source include/check_qep.inc +# +# EXAMPLE +# t/greedy_optimizer.test +# + +flush status; +eval EXPLAIN $query; +eval $query; + +let $cost= + query_get_value(SHOW STATUS LIKE 'Last_query_cost', Value, 1); + +--disable_warnings +let $reads= +`select sum(variable_value) + from information_schema.session_status + where VARIABLE_NAME like 'Handler_read%'`; +--enable_warnings + +#echo Cost: $cost, Handler_reads: $reads; + +if ($cost != $best_cost) +{ echo ### FAILED: Query_cost: $cost, expected: $best_cost ###; +} +# Difference in handler reads are ok as tables in MariaDB are sorted according +# to order in the query and the tables in greedy_optimizer.inc has reference to +# rows that does not exists, so different table orders will do different +# number of reads + +if ($reads != $best_reads) +{ echo ### NOTE: Handler_reads: $reads, expected: $best_reads ###; +} diff --git a/mysql-test/include/execute_with_statistics.inc b/mysql-test/include/execute_with_statistics.inc new file mode 100644 index 00000000000..c2305fe5247 --- /dev/null +++ b/mysql-test/include/execute_with_statistics.inc @@ -0,0 +1,30 @@ +# include/execute_with_statistics.inc +# +# SUMMARY +# +# Explain and execute the select statment in $query. +# Then report 'Last_query_cost' estimate from the query +# optimizer and total number of 'Handler_read%' when the +# query was executed. +# Intended usage is to verify that there are not regressions +# in either calculated or actuall cost for $query. +# +# USAGE +# +# let $query= ; +# --source include/expect_qep.inc +# let $query=